xref: /kernel/linux/linux-5.10/net/ipv4/tcp.c (revision 8c2ecf20)
18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * INET		An implementation of the TCP/IP protocol suite for the LINUX
48c2ecf20Sopenharmony_ci *		operating system.  INET is implemented using the  BSD Socket
58c2ecf20Sopenharmony_ci *		interface as the means of communication with the user level.
68c2ecf20Sopenharmony_ci *
78c2ecf20Sopenharmony_ci *		Implementation of the Transmission Control Protocol(TCP).
88c2ecf20Sopenharmony_ci *
98c2ecf20Sopenharmony_ci * Authors:	Ross Biro
108c2ecf20Sopenharmony_ci *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
118c2ecf20Sopenharmony_ci *		Mark Evans, <evansmp@uhura.aston.ac.uk>
128c2ecf20Sopenharmony_ci *		Corey Minyard <wf-rch!minyard@relay.EU.net>
138c2ecf20Sopenharmony_ci *		Florian La Roche, <flla@stud.uni-sb.de>
148c2ecf20Sopenharmony_ci *		Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
158c2ecf20Sopenharmony_ci *		Linus Torvalds, <torvalds@cs.helsinki.fi>
168c2ecf20Sopenharmony_ci *		Alan Cox, <gw4pts@gw4pts.ampr.org>
178c2ecf20Sopenharmony_ci *		Matthew Dillon, <dillon@apollo.west.oic.com>
188c2ecf20Sopenharmony_ci *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
198c2ecf20Sopenharmony_ci *		Jorge Cwik, <jorge@laser.satlink.net>
208c2ecf20Sopenharmony_ci *
218c2ecf20Sopenharmony_ci * Fixes:
228c2ecf20Sopenharmony_ci *		Alan Cox	:	Numerous verify_area() calls
238c2ecf20Sopenharmony_ci *		Alan Cox	:	Set the ACK bit on a reset
248c2ecf20Sopenharmony_ci *		Alan Cox	:	Stopped it crashing if it closed while
258c2ecf20Sopenharmony_ci *					sk->inuse=1 and was trying to connect
268c2ecf20Sopenharmony_ci *					(tcp_err()).
278c2ecf20Sopenharmony_ci *		Alan Cox	:	All icmp error handling was broken
288c2ecf20Sopenharmony_ci *					pointers passed where wrong and the
298c2ecf20Sopenharmony_ci *					socket was looked up backwards. Nobody
308c2ecf20Sopenharmony_ci *					tested any icmp error code obviously.
318c2ecf20Sopenharmony_ci *		Alan Cox	:	tcp_err() now handled properly. It
328c2ecf20Sopenharmony_ci *					wakes people on errors. poll
338c2ecf20Sopenharmony_ci *					behaves and the icmp error race
348c2ecf20Sopenharmony_ci *					has gone by moving it into sock.c
358c2ecf20Sopenharmony_ci *		Alan Cox	:	tcp_send_reset() fixed to work for
368c2ecf20Sopenharmony_ci *					everything not just packets for
378c2ecf20Sopenharmony_ci *					unknown sockets.
388c2ecf20Sopenharmony_ci *		Alan Cox	:	tcp option processing.
398c2ecf20Sopenharmony_ci *		Alan Cox	:	Reset tweaked (still not 100%) [Had
408c2ecf20Sopenharmony_ci *					syn rule wrong]
418c2ecf20Sopenharmony_ci *		Herp Rosmanith  :	More reset fixes
428c2ecf20Sopenharmony_ci *		Alan Cox	:	No longer acks invalid rst frames.
438c2ecf20Sopenharmony_ci *					Acking any kind of RST is right out.
448c2ecf20Sopenharmony_ci *		Alan Cox	:	Sets an ignore me flag on an rst
458c2ecf20Sopenharmony_ci *					receive otherwise odd bits of prattle
468c2ecf20Sopenharmony_ci *					escape still
478c2ecf20Sopenharmony_ci *		Alan Cox	:	Fixed another acking RST frame bug.
488c2ecf20Sopenharmony_ci *					Should stop LAN workplace lockups.
498c2ecf20Sopenharmony_ci *		Alan Cox	: 	Some tidyups using the new skb list
508c2ecf20Sopenharmony_ci *					facilities
518c2ecf20Sopenharmony_ci *		Alan Cox	:	sk->keepopen now seems to work
528c2ecf20Sopenharmony_ci *		Alan Cox	:	Pulls options out correctly on accepts
538c2ecf20Sopenharmony_ci *		Alan Cox	:	Fixed assorted sk->rqueue->next errors
548c2ecf20Sopenharmony_ci *		Alan Cox	:	PSH doesn't end a TCP read. Switched a
558c2ecf20Sopenharmony_ci *					bit to skb ops.
568c2ecf20Sopenharmony_ci *		Alan Cox	:	Tidied tcp_data to avoid a potential
578c2ecf20Sopenharmony_ci *					nasty.
588c2ecf20Sopenharmony_ci *		Alan Cox	:	Added some better commenting, as the
598c2ecf20Sopenharmony_ci *					tcp is hard to follow
608c2ecf20Sopenharmony_ci *		Alan Cox	:	Removed incorrect check for 20 * psh
618c2ecf20Sopenharmony_ci *	Michael O'Reilly	:	ack < copied bug fix.
628c2ecf20Sopenharmony_ci *	Johannes Stille		:	Misc tcp fixes (not all in yet).
638c2ecf20Sopenharmony_ci *		Alan Cox	:	FIN with no memory -> CRASH
648c2ecf20Sopenharmony_ci *		Alan Cox	:	Added socket option proto entries.
658c2ecf20Sopenharmony_ci *					Also added awareness of them to accept.
668c2ecf20Sopenharmony_ci *		Alan Cox	:	Added TCP options (SOL_TCP)
678c2ecf20Sopenharmony_ci *		Alan Cox	:	Switched wakeup calls to callbacks,
688c2ecf20Sopenharmony_ci *					so the kernel can layer network
698c2ecf20Sopenharmony_ci *					sockets.
708c2ecf20Sopenharmony_ci *		Alan Cox	:	Use ip_tos/ip_ttl settings.
718c2ecf20Sopenharmony_ci *		Alan Cox	:	Handle FIN (more) properly (we hope).
728c2ecf20Sopenharmony_ci *		Alan Cox	:	RST frames sent on unsynchronised
738c2ecf20Sopenharmony_ci *					state ack error.
748c2ecf20Sopenharmony_ci *		Alan Cox	:	Put in missing check for SYN bit.
758c2ecf20Sopenharmony_ci *		Alan Cox	:	Added tcp_select_window() aka NET2E
768c2ecf20Sopenharmony_ci *					window non shrink trick.
778c2ecf20Sopenharmony_ci *		Alan Cox	:	Added a couple of small NET2E timer
788c2ecf20Sopenharmony_ci *					fixes
798c2ecf20Sopenharmony_ci *		Charles Hedrick :	TCP fixes
808c2ecf20Sopenharmony_ci *		Toomas Tamm	:	TCP window fixes
818c2ecf20Sopenharmony_ci *		Alan Cox	:	Small URG fix to rlogin ^C ack fight
828c2ecf20Sopenharmony_ci *		Charles Hedrick	:	Rewrote most of it to actually work
838c2ecf20Sopenharmony_ci *		Linus		:	Rewrote tcp_read() and URG handling
848c2ecf20Sopenharmony_ci *					completely
858c2ecf20Sopenharmony_ci *		Gerhard Koerting:	Fixed some missing timer handling
868c2ecf20Sopenharmony_ci *		Matthew Dillon  :	Reworked TCP machine states as per RFC
878c2ecf20Sopenharmony_ci *		Gerhard Koerting:	PC/TCP workarounds
888c2ecf20Sopenharmony_ci *		Adam Caldwell	:	Assorted timer/timing errors
898c2ecf20Sopenharmony_ci *		Matthew Dillon	:	Fixed another RST bug
908c2ecf20Sopenharmony_ci *		Alan Cox	:	Move to kernel side addressing changes.
918c2ecf20Sopenharmony_ci *		Alan Cox	:	Beginning work on TCP fastpathing
928c2ecf20Sopenharmony_ci *					(not yet usable)
938c2ecf20Sopenharmony_ci *		Arnt Gulbrandsen:	Turbocharged tcp_check() routine.
948c2ecf20Sopenharmony_ci *		Alan Cox	:	TCP fast path debugging
958c2ecf20Sopenharmony_ci *		Alan Cox	:	Window clamping
968c2ecf20Sopenharmony_ci *		Michael Riepe	:	Bug in tcp_check()
978c2ecf20Sopenharmony_ci *		Matt Dillon	:	More TCP improvements and RST bug fixes
988c2ecf20Sopenharmony_ci *		Matt Dillon	:	Yet more small nasties remove from the
998c2ecf20Sopenharmony_ci *					TCP code (Be very nice to this man if
1008c2ecf20Sopenharmony_ci *					tcp finally works 100%) 8)
1018c2ecf20Sopenharmony_ci *		Alan Cox	:	BSD accept semantics.
1028c2ecf20Sopenharmony_ci *		Alan Cox	:	Reset on closedown bug.
1038c2ecf20Sopenharmony_ci *	Peter De Schrijver	:	ENOTCONN check missing in tcp_sendto().
1048c2ecf20Sopenharmony_ci *		Michael Pall	:	Handle poll() after URG properly in
1058c2ecf20Sopenharmony_ci *					all cases.
1068c2ecf20Sopenharmony_ci *		Michael Pall	:	Undo the last fix in tcp_read_urg()
1078c2ecf20Sopenharmony_ci *					(multi URG PUSH broke rlogin).
1088c2ecf20Sopenharmony_ci *		Michael Pall	:	Fix the multi URG PUSH problem in
1098c2ecf20Sopenharmony_ci *					tcp_readable(), poll() after URG
1108c2ecf20Sopenharmony_ci *					works now.
1118c2ecf20Sopenharmony_ci *		Michael Pall	:	recv(...,MSG_OOB) never blocks in the
1128c2ecf20Sopenharmony_ci *					BSD api.
1138c2ecf20Sopenharmony_ci *		Alan Cox	:	Changed the semantics of sk->socket to
1148c2ecf20Sopenharmony_ci *					fix a race and a signal problem with
1158c2ecf20Sopenharmony_ci *					accept() and async I/O.
1168c2ecf20Sopenharmony_ci *		Alan Cox	:	Relaxed the rules on tcp_sendto().
1178c2ecf20Sopenharmony_ci *		Yury Shevchuk	:	Really fixed accept() blocking problem.
1188c2ecf20Sopenharmony_ci *		Craig I. Hagan  :	Allow for BSD compatible TIME_WAIT for
1198c2ecf20Sopenharmony_ci *					clients/servers which listen in on
1208c2ecf20Sopenharmony_ci *					fixed ports.
1218c2ecf20Sopenharmony_ci *		Alan Cox	:	Cleaned the above up and shrank it to
1228c2ecf20Sopenharmony_ci *					a sensible code size.
1238c2ecf20Sopenharmony_ci *		Alan Cox	:	Self connect lockup fix.
1248c2ecf20Sopenharmony_ci *		Alan Cox	:	No connect to multicast.
1258c2ecf20Sopenharmony_ci *		Ross Biro	:	Close unaccepted children on master
1268c2ecf20Sopenharmony_ci *					socket close.
1278c2ecf20Sopenharmony_ci *		Alan Cox	:	Reset tracing code.
1288c2ecf20Sopenharmony_ci *		Alan Cox	:	Spurious resets on shutdown.
1298c2ecf20Sopenharmony_ci *		Alan Cox	:	Giant 15 minute/60 second timer error
1308c2ecf20Sopenharmony_ci *		Alan Cox	:	Small whoops in polling before an
1318c2ecf20Sopenharmony_ci *					accept.
1328c2ecf20Sopenharmony_ci *		Alan Cox	:	Kept the state trace facility since
1338c2ecf20Sopenharmony_ci *					it's handy for debugging.
1348c2ecf20Sopenharmony_ci *		Alan Cox	:	More reset handler fixes.
1358c2ecf20Sopenharmony_ci *		Alan Cox	:	Started rewriting the code based on
1368c2ecf20Sopenharmony_ci *					the RFC's for other useful protocol
1378c2ecf20Sopenharmony_ci *					references see: Comer, KA9Q NOS, and
1388c2ecf20Sopenharmony_ci *					for a reference on the difference
1398c2ecf20Sopenharmony_ci *					between specifications and how BSD
1408c2ecf20Sopenharmony_ci *					works see the 4.4lite source.
1418c2ecf20Sopenharmony_ci *		A.N.Kuznetsov	:	Don't time wait on completion of tidy
1428c2ecf20Sopenharmony_ci *					close.
1438c2ecf20Sopenharmony_ci *		Linus Torvalds	:	Fin/Shutdown & copied_seq changes.
1448c2ecf20Sopenharmony_ci *		Linus Torvalds	:	Fixed BSD port reuse to work first syn
1458c2ecf20Sopenharmony_ci *		Alan Cox	:	Reimplemented timers as per the RFC
1468c2ecf20Sopenharmony_ci *					and using multiple timers for sanity.
1478c2ecf20Sopenharmony_ci *		Alan Cox	:	Small bug fixes, and a lot of new
1488c2ecf20Sopenharmony_ci *					comments.
1498c2ecf20Sopenharmony_ci *		Alan Cox	:	Fixed dual reader crash by locking
1508c2ecf20Sopenharmony_ci *					the buffers (much like datagram.c)
1518c2ecf20Sopenharmony_ci *		Alan Cox	:	Fixed stuck sockets in probe. A probe
1528c2ecf20Sopenharmony_ci *					now gets fed up of retrying without
1538c2ecf20Sopenharmony_ci *					(even a no space) answer.
1548c2ecf20Sopenharmony_ci *		Alan Cox	:	Extracted closing code better
1558c2ecf20Sopenharmony_ci *		Alan Cox	:	Fixed the closing state machine to
1568c2ecf20Sopenharmony_ci *					resemble the RFC.
1578c2ecf20Sopenharmony_ci *		Alan Cox	:	More 'per spec' fixes.
1588c2ecf20Sopenharmony_ci *		Jorge Cwik	:	Even faster checksumming.
1598c2ecf20Sopenharmony_ci *		Alan Cox	:	tcp_data() doesn't ack illegal PSH
1608c2ecf20Sopenharmony_ci *					only frames. At least one pc tcp stack
1618c2ecf20Sopenharmony_ci *					generates them.
1628c2ecf20Sopenharmony_ci *		Alan Cox	:	Cache last socket.
1638c2ecf20Sopenharmony_ci *		Alan Cox	:	Per route irtt.
1648c2ecf20Sopenharmony_ci *		Matt Day	:	poll()->select() match BSD precisely on error
1658c2ecf20Sopenharmony_ci *		Alan Cox	:	New buffers
1668c2ecf20Sopenharmony_ci *		Marc Tamsky	:	Various sk->prot->retransmits and
1678c2ecf20Sopenharmony_ci *					sk->retransmits misupdating fixed.
1688c2ecf20Sopenharmony_ci *					Fixed tcp_write_timeout: stuck close,
1698c2ecf20Sopenharmony_ci *					and TCP syn retries gets used now.
1708c2ecf20Sopenharmony_ci *		Mark Yarvis	:	In tcp_read_wakeup(), don't send an
1718c2ecf20Sopenharmony_ci *					ack if state is TCP_CLOSED.
1728c2ecf20Sopenharmony_ci *		Alan Cox	:	Look up device on a retransmit - routes may
1738c2ecf20Sopenharmony_ci *					change. Doesn't yet cope with MSS shrink right
1748c2ecf20Sopenharmony_ci *					but it's a start!
1758c2ecf20Sopenharmony_ci *		Marc Tamsky	:	Closing in closing fixes.
1768c2ecf20Sopenharmony_ci *		Mike Shaver	:	RFC1122 verifications.
1778c2ecf20Sopenharmony_ci *		Alan Cox	:	rcv_saddr errors.
1788c2ecf20Sopenharmony_ci *		Alan Cox	:	Block double connect().
1798c2ecf20Sopenharmony_ci *		Alan Cox	:	Small hooks for enSKIP.
1808c2ecf20Sopenharmony_ci *		Alexey Kuznetsov:	Path MTU discovery.
1818c2ecf20Sopenharmony_ci *		Alan Cox	:	Support soft errors.
1828c2ecf20Sopenharmony_ci *		Alan Cox	:	Fix MTU discovery pathological case
1838c2ecf20Sopenharmony_ci *					when the remote claims no mtu!
1848c2ecf20Sopenharmony_ci *		Marc Tamsky	:	TCP_CLOSE fix.
1858c2ecf20Sopenharmony_ci *		Colin (G3TNE)	:	Send a reset on syn ack replies in
1868c2ecf20Sopenharmony_ci *					window but wrong (fixes NT lpd problems)
1878c2ecf20Sopenharmony_ci *		Pedro Roque	:	Better TCP window handling, delayed ack.
1888c2ecf20Sopenharmony_ci *		Joerg Reuter	:	No modification of locked buffers in
1898c2ecf20Sopenharmony_ci *					tcp_do_retransmit()
1908c2ecf20Sopenharmony_ci *		Eric Schenk	:	Changed receiver side silly window
1918c2ecf20Sopenharmony_ci *					avoidance algorithm to BSD style
1928c2ecf20Sopenharmony_ci *					algorithm. This doubles throughput
1938c2ecf20Sopenharmony_ci *					against machines running Solaris,
1948c2ecf20Sopenharmony_ci *					and seems to result in general
1958c2ecf20Sopenharmony_ci *					improvement.
1968c2ecf20Sopenharmony_ci *	Stefan Magdalinski	:	adjusted tcp_readable() to fix FIONREAD
1978c2ecf20Sopenharmony_ci *	Willy Konynenberg	:	Transparent proxying support.
1988c2ecf20Sopenharmony_ci *	Mike McLagan		:	Routing by source
1998c2ecf20Sopenharmony_ci *		Keith Owens	:	Do proper merging with partial SKB's in
2008c2ecf20Sopenharmony_ci *					tcp_do_sendmsg to avoid burstiness.
2018c2ecf20Sopenharmony_ci *		Eric Schenk	:	Fix fast close down bug with
2028c2ecf20Sopenharmony_ci *					shutdown() followed by close().
2038c2ecf20Sopenharmony_ci *		Andi Kleen 	:	Make poll agree with SIGIO
2048c2ecf20Sopenharmony_ci *	Salvatore Sanfilippo	:	Support SO_LINGER with linger == 1 and
2058c2ecf20Sopenharmony_ci *					lingertime == 0 (RFC 793 ABORT Call)
2068c2ecf20Sopenharmony_ci *	Hirokazu Takahashi	:	Use copy_from_user() instead of
2078c2ecf20Sopenharmony_ci *					csum_and_copy_from_user() if possible.
2088c2ecf20Sopenharmony_ci *
2098c2ecf20Sopenharmony_ci * Description of States:
2108c2ecf20Sopenharmony_ci *
2118c2ecf20Sopenharmony_ci *	TCP_SYN_SENT		sent a connection request, waiting for ack
2128c2ecf20Sopenharmony_ci *
2138c2ecf20Sopenharmony_ci *	TCP_SYN_RECV		received a connection request, sent ack,
2148c2ecf20Sopenharmony_ci *				waiting for final ack in three-way handshake.
2158c2ecf20Sopenharmony_ci *
2168c2ecf20Sopenharmony_ci *	TCP_ESTABLISHED		connection established
2178c2ecf20Sopenharmony_ci *
2188c2ecf20Sopenharmony_ci *	TCP_FIN_WAIT1		our side has shutdown, waiting to complete
2198c2ecf20Sopenharmony_ci *				transmission of remaining buffered data
2208c2ecf20Sopenharmony_ci *
2218c2ecf20Sopenharmony_ci *	TCP_FIN_WAIT2		all buffered data sent, waiting for remote
2228c2ecf20Sopenharmony_ci *				to shutdown
2238c2ecf20Sopenharmony_ci *
2248c2ecf20Sopenharmony_ci *	TCP_CLOSING		both sides have shutdown but we still have
2258c2ecf20Sopenharmony_ci *				data we have to finish sending
2268c2ecf20Sopenharmony_ci *
2278c2ecf20Sopenharmony_ci *	TCP_TIME_WAIT		timeout to catch resent junk before entering
2288c2ecf20Sopenharmony_ci *				closed, can only be entered from FIN_WAIT2
2298c2ecf20Sopenharmony_ci *				or CLOSING.  Required because the other end
2308c2ecf20Sopenharmony_ci *				may not have gotten our last ACK causing it
2318c2ecf20Sopenharmony_ci *				to retransmit the data packet (which we ignore)
2328c2ecf20Sopenharmony_ci *
2338c2ecf20Sopenharmony_ci *	TCP_CLOSE_WAIT		remote side has shutdown and is waiting for
2348c2ecf20Sopenharmony_ci *				us to finish writing our data and to shutdown
2358c2ecf20Sopenharmony_ci *				(we have to close() to move on to LAST_ACK)
2368c2ecf20Sopenharmony_ci *
2378c2ecf20Sopenharmony_ci *	TCP_LAST_ACK		out side has shutdown after remote has
2388c2ecf20Sopenharmony_ci *				shutdown.  There may still be data in our
2398c2ecf20Sopenharmony_ci *				buffer that we have to finish sending
2408c2ecf20Sopenharmony_ci *
2418c2ecf20Sopenharmony_ci *	TCP_CLOSE		socket is finished
2428c2ecf20Sopenharmony_ci */
2438c2ecf20Sopenharmony_ci
2448c2ecf20Sopenharmony_ci#define pr_fmt(fmt) "TCP: " fmt
2458c2ecf20Sopenharmony_ci
2468c2ecf20Sopenharmony_ci#include <crypto/hash.h>
2478c2ecf20Sopenharmony_ci#include <linux/kernel.h>
2488c2ecf20Sopenharmony_ci#include <linux/module.h>
2498c2ecf20Sopenharmony_ci#include <linux/types.h>
2508c2ecf20Sopenharmony_ci#include <linux/fcntl.h>
2518c2ecf20Sopenharmony_ci#include <linux/poll.h>
2528c2ecf20Sopenharmony_ci#include <linux/inet_diag.h>
2538c2ecf20Sopenharmony_ci#include <linux/init.h>
2548c2ecf20Sopenharmony_ci#include <linux/fs.h>
2558c2ecf20Sopenharmony_ci#include <linux/skbuff.h>
2568c2ecf20Sopenharmony_ci#include <linux/scatterlist.h>
2578c2ecf20Sopenharmony_ci#include <linux/splice.h>
2588c2ecf20Sopenharmony_ci#include <linux/net.h>
2598c2ecf20Sopenharmony_ci#include <linux/socket.h>
2608c2ecf20Sopenharmony_ci#include <linux/random.h>
2618c2ecf20Sopenharmony_ci#include <linux/memblock.h>
2628c2ecf20Sopenharmony_ci#include <linux/highmem.h>
2638c2ecf20Sopenharmony_ci#include <linux/swap.h>
2648c2ecf20Sopenharmony_ci#include <linux/cache.h>
2658c2ecf20Sopenharmony_ci#include <linux/err.h>
2668c2ecf20Sopenharmony_ci#include <linux/time.h>
2678c2ecf20Sopenharmony_ci#include <linux/slab.h>
2688c2ecf20Sopenharmony_ci#include <linux/errqueue.h>
2698c2ecf20Sopenharmony_ci#include <linux/static_key.h>
2708c2ecf20Sopenharmony_ci
2718c2ecf20Sopenharmony_ci#include <net/icmp.h>
2728c2ecf20Sopenharmony_ci#include <net/inet_common.h>
2738c2ecf20Sopenharmony_ci#include <net/tcp.h>
2748c2ecf20Sopenharmony_ci#include <net/mptcp.h>
2758c2ecf20Sopenharmony_ci#include <net/xfrm.h>
2768c2ecf20Sopenharmony_ci#include <net/ip.h>
2778c2ecf20Sopenharmony_ci#include <net/sock.h>
2788c2ecf20Sopenharmony_ci
2798c2ecf20Sopenharmony_ci#include <linux/uaccess.h>
2808c2ecf20Sopenharmony_ci#include <asm/ioctls.h>
2818c2ecf20Sopenharmony_ci#include <net/busy_poll.h>
2828c2ecf20Sopenharmony_ci#ifdef CONFIG_LOWPOWER_PROTOCOL
2838c2ecf20Sopenharmony_ci#include <net/lowpower_protocol.h>
2848c2ecf20Sopenharmony_ci#endif /* CONFIG_LOWPOWER_PROTOCOL */
2858c2ecf20Sopenharmony_ci#if defined(CONFIG_TCP_NATA_URC) || defined(CONFIG_TCP_NATA_STL)
2868c2ecf20Sopenharmony_ci#include <net/nata.h>
2878c2ecf20Sopenharmony_ci#endif
2888c2ecf20Sopenharmony_ci
2898c2ecf20Sopenharmony_ciDEFINE_PER_CPU(unsigned int, tcp_orphan_count);
2908c2ecf20Sopenharmony_ciEXPORT_PER_CPU_SYMBOL_GPL(tcp_orphan_count);
2918c2ecf20Sopenharmony_ci
2928c2ecf20Sopenharmony_cilong sysctl_tcp_mem[3] __read_mostly;
2938c2ecf20Sopenharmony_ciEXPORT_SYMBOL(sysctl_tcp_mem);
2948c2ecf20Sopenharmony_ci
2958c2ecf20Sopenharmony_ciatomic_long_t tcp_memory_allocated;	/* Current allocated memory. */
2968c2ecf20Sopenharmony_ciEXPORT_SYMBOL(tcp_memory_allocated);
2978c2ecf20Sopenharmony_ci
2988c2ecf20Sopenharmony_ci#if IS_ENABLED(CONFIG_SMC)
2998c2ecf20Sopenharmony_ciDEFINE_STATIC_KEY_FALSE(tcp_have_smc);
3008c2ecf20Sopenharmony_ciEXPORT_SYMBOL(tcp_have_smc);
3018c2ecf20Sopenharmony_ci#endif
3028c2ecf20Sopenharmony_ci
3038c2ecf20Sopenharmony_ci/*
3048c2ecf20Sopenharmony_ci * Current number of TCP sockets.
3058c2ecf20Sopenharmony_ci */
3068c2ecf20Sopenharmony_cistruct percpu_counter tcp_sockets_allocated;
3078c2ecf20Sopenharmony_ciEXPORT_SYMBOL(tcp_sockets_allocated);
3088c2ecf20Sopenharmony_ci
3098c2ecf20Sopenharmony_ci/*
3108c2ecf20Sopenharmony_ci * TCP splice context
3118c2ecf20Sopenharmony_ci */
3128c2ecf20Sopenharmony_cistruct tcp_splice_state {
3138c2ecf20Sopenharmony_ci	struct pipe_inode_info *pipe;
3148c2ecf20Sopenharmony_ci	size_t len;
3158c2ecf20Sopenharmony_ci	unsigned int flags;
3168c2ecf20Sopenharmony_ci};
3178c2ecf20Sopenharmony_ci
3188c2ecf20Sopenharmony_ci/*
3198c2ecf20Sopenharmony_ci * Pressure flag: try to collapse.
3208c2ecf20Sopenharmony_ci * Technical note: it is used by multiple contexts non atomically.
3218c2ecf20Sopenharmony_ci * All the __sk_mem_schedule() is of this nature: accounting
3228c2ecf20Sopenharmony_ci * is strict, actions are advisory and have some latency.
3238c2ecf20Sopenharmony_ci */
3248c2ecf20Sopenharmony_ciunsigned long tcp_memory_pressure __read_mostly;
3258c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(tcp_memory_pressure);
3268c2ecf20Sopenharmony_ci
3278c2ecf20Sopenharmony_ciDEFINE_STATIC_KEY_FALSE(tcp_rx_skb_cache_key);
3288c2ecf20Sopenharmony_ciEXPORT_SYMBOL(tcp_rx_skb_cache_key);
3298c2ecf20Sopenharmony_ci
3308c2ecf20Sopenharmony_ciDEFINE_STATIC_KEY_FALSE(tcp_tx_skb_cache_key);
3318c2ecf20Sopenharmony_ci
3328c2ecf20Sopenharmony_civoid tcp_enter_memory_pressure(struct sock *sk)
3338c2ecf20Sopenharmony_ci{
3348c2ecf20Sopenharmony_ci	unsigned long val;
3358c2ecf20Sopenharmony_ci
3368c2ecf20Sopenharmony_ci	if (READ_ONCE(tcp_memory_pressure))
3378c2ecf20Sopenharmony_ci		return;
3388c2ecf20Sopenharmony_ci	val = jiffies;
3398c2ecf20Sopenharmony_ci
3408c2ecf20Sopenharmony_ci	if (!val)
3418c2ecf20Sopenharmony_ci		val--;
3428c2ecf20Sopenharmony_ci	if (!cmpxchg(&tcp_memory_pressure, 0, val))
3438c2ecf20Sopenharmony_ci		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMEMORYPRESSURES);
3448c2ecf20Sopenharmony_ci}
3458c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(tcp_enter_memory_pressure);
3468c2ecf20Sopenharmony_ci
3478c2ecf20Sopenharmony_civoid tcp_leave_memory_pressure(struct sock *sk)
3488c2ecf20Sopenharmony_ci{
3498c2ecf20Sopenharmony_ci	unsigned long val;
3508c2ecf20Sopenharmony_ci
3518c2ecf20Sopenharmony_ci	if (!READ_ONCE(tcp_memory_pressure))
3528c2ecf20Sopenharmony_ci		return;
3538c2ecf20Sopenharmony_ci	val = xchg(&tcp_memory_pressure, 0);
3548c2ecf20Sopenharmony_ci	if (val)
3558c2ecf20Sopenharmony_ci		NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPMEMORYPRESSURESCHRONO,
3568c2ecf20Sopenharmony_ci			      jiffies_to_msecs(jiffies - val));
3578c2ecf20Sopenharmony_ci}
3588c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(tcp_leave_memory_pressure);
3598c2ecf20Sopenharmony_ci
3608c2ecf20Sopenharmony_ci/* Convert seconds to retransmits based on initial and max timeout */
3618c2ecf20Sopenharmony_cistatic u8 secs_to_retrans(int seconds, int timeout, int rto_max)
3628c2ecf20Sopenharmony_ci{
3638c2ecf20Sopenharmony_ci	u8 res = 0;
3648c2ecf20Sopenharmony_ci
3658c2ecf20Sopenharmony_ci	if (seconds > 0) {
3668c2ecf20Sopenharmony_ci		int period = timeout;
3678c2ecf20Sopenharmony_ci
3688c2ecf20Sopenharmony_ci		res = 1;
3698c2ecf20Sopenharmony_ci		while (seconds > period && res < 255) {
3708c2ecf20Sopenharmony_ci			res++;
3718c2ecf20Sopenharmony_ci			timeout <<= 1;
3728c2ecf20Sopenharmony_ci			if (timeout > rto_max)
3738c2ecf20Sopenharmony_ci				timeout = rto_max;
3748c2ecf20Sopenharmony_ci			period += timeout;
3758c2ecf20Sopenharmony_ci		}
3768c2ecf20Sopenharmony_ci	}
3778c2ecf20Sopenharmony_ci	return res;
3788c2ecf20Sopenharmony_ci}
3798c2ecf20Sopenharmony_ci
3808c2ecf20Sopenharmony_ci/* Convert retransmits to seconds based on initial and max timeout */
3818c2ecf20Sopenharmony_cistatic int retrans_to_secs(u8 retrans, int timeout, int rto_max)
3828c2ecf20Sopenharmony_ci{
3838c2ecf20Sopenharmony_ci	int period = 0;
3848c2ecf20Sopenharmony_ci
3858c2ecf20Sopenharmony_ci	if (retrans > 0) {
3868c2ecf20Sopenharmony_ci		period = timeout;
3878c2ecf20Sopenharmony_ci		while (--retrans) {
3888c2ecf20Sopenharmony_ci			timeout <<= 1;
3898c2ecf20Sopenharmony_ci			if (timeout > rto_max)
3908c2ecf20Sopenharmony_ci				timeout = rto_max;
3918c2ecf20Sopenharmony_ci			period += timeout;
3928c2ecf20Sopenharmony_ci		}
3938c2ecf20Sopenharmony_ci	}
3948c2ecf20Sopenharmony_ci	return period;
3958c2ecf20Sopenharmony_ci}
3968c2ecf20Sopenharmony_ci
3978c2ecf20Sopenharmony_cistatic u64 tcp_compute_delivery_rate(const struct tcp_sock *tp)
3988c2ecf20Sopenharmony_ci{
3998c2ecf20Sopenharmony_ci	u32 rate = READ_ONCE(tp->rate_delivered);
4008c2ecf20Sopenharmony_ci	u32 intv = READ_ONCE(tp->rate_interval_us);
4018c2ecf20Sopenharmony_ci	u64 rate64 = 0;
4028c2ecf20Sopenharmony_ci
4038c2ecf20Sopenharmony_ci	if (rate && intv) {
4048c2ecf20Sopenharmony_ci		rate64 = (u64)rate * tp->mss_cache * USEC_PER_SEC;
4058c2ecf20Sopenharmony_ci		do_div(rate64, intv);
4068c2ecf20Sopenharmony_ci	}
4078c2ecf20Sopenharmony_ci	return rate64;
4088c2ecf20Sopenharmony_ci}
4098c2ecf20Sopenharmony_ci
4108c2ecf20Sopenharmony_ci/* Address-family independent initialization for a tcp_sock.
4118c2ecf20Sopenharmony_ci *
4128c2ecf20Sopenharmony_ci * NOTE: A lot of things set to zero explicitly by call to
4138c2ecf20Sopenharmony_ci *       sk_alloc() so need not be done here.
4148c2ecf20Sopenharmony_ci */
4158c2ecf20Sopenharmony_civoid tcp_init_sock(struct sock *sk)
4168c2ecf20Sopenharmony_ci{
4178c2ecf20Sopenharmony_ci	struct inet_connection_sock *icsk = inet_csk(sk);
4188c2ecf20Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
4198c2ecf20Sopenharmony_ci
4208c2ecf20Sopenharmony_ci	tp->out_of_order_queue = RB_ROOT;
4218c2ecf20Sopenharmony_ci	sk->tcp_rtx_queue = RB_ROOT;
4228c2ecf20Sopenharmony_ci	tcp_init_xmit_timers(sk);
4238c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&tp->tsq_node);
4248c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&tp->tsorted_sent_queue);
4258c2ecf20Sopenharmony_ci
4268c2ecf20Sopenharmony_ci	icsk->icsk_rto = TCP_TIMEOUT_INIT;
4278c2ecf20Sopenharmony_ci	icsk->icsk_rto_min = TCP_RTO_MIN;
4288c2ecf20Sopenharmony_ci	icsk->icsk_delack_max = TCP_DELACK_MAX;
4298c2ecf20Sopenharmony_ci	tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT);
4308c2ecf20Sopenharmony_ci	minmax_reset(&tp->rtt_min, tcp_jiffies32, ~0U);
4318c2ecf20Sopenharmony_ci
4328c2ecf20Sopenharmony_ci	/* So many TCP implementations out there (incorrectly) count the
4338c2ecf20Sopenharmony_ci	 * initial SYN frame in their delayed-ACK and congestion control
4348c2ecf20Sopenharmony_ci	 * algorithms that we must have the following bandaid to talk
4358c2ecf20Sopenharmony_ci	 * efficiently to them.  -DaveM
4368c2ecf20Sopenharmony_ci	 */
4378c2ecf20Sopenharmony_ci	tp->snd_cwnd = TCP_INIT_CWND;
4388c2ecf20Sopenharmony_ci
4398c2ecf20Sopenharmony_ci	/* There's a bubble in the pipe until at least the first ACK. */
4408c2ecf20Sopenharmony_ci	tp->app_limited = ~0U;
4418c2ecf20Sopenharmony_ci	tp->rate_app_limited = 1;
4428c2ecf20Sopenharmony_ci
4438c2ecf20Sopenharmony_ci	/* See draft-stevens-tcpca-spec-01 for discussion of the
4448c2ecf20Sopenharmony_ci	 * initialization of these values.
4458c2ecf20Sopenharmony_ci	 */
4468c2ecf20Sopenharmony_ci	tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
4478c2ecf20Sopenharmony_ci	tp->snd_cwnd_clamp = ~0;
4488c2ecf20Sopenharmony_ci	tp->mss_cache = TCP_MSS_DEFAULT;
4498c2ecf20Sopenharmony_ci
4508c2ecf20Sopenharmony_ci	tp->reordering = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reordering);
4518c2ecf20Sopenharmony_ci	tcp_assign_congestion_control(sk);
4528c2ecf20Sopenharmony_ci
4538c2ecf20Sopenharmony_ci	tp->tsoffset = 0;
4548c2ecf20Sopenharmony_ci	tp->rack.reo_wnd_steps = 1;
4558c2ecf20Sopenharmony_ci
4568c2ecf20Sopenharmony_ci	sk->sk_write_space = sk_stream_write_space;
4578c2ecf20Sopenharmony_ci	sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
4588c2ecf20Sopenharmony_ci
4598c2ecf20Sopenharmony_ci	icsk->icsk_sync_mss = tcp_sync_mss;
4608c2ecf20Sopenharmony_ci
4618c2ecf20Sopenharmony_ci	WRITE_ONCE(sk->sk_sndbuf, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[1]));
4628c2ecf20Sopenharmony_ci	WRITE_ONCE(sk->sk_rcvbuf, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[1]));
4638c2ecf20Sopenharmony_ci
4648c2ecf20Sopenharmony_ci	sk_sockets_allocated_inc(sk);
4658c2ecf20Sopenharmony_ci	sk->sk_route_forced_caps = NETIF_F_GSO;
4668c2ecf20Sopenharmony_ci#if defined(CONFIG_TCP_NATA_URC) || defined(CONFIG_TCP_NATA_STL)
4678c2ecf20Sopenharmony_ci	icsk->nata_retries_enabled = 0;
4688c2ecf20Sopenharmony_ci	icsk->nata_retries_type = NATA_NA;
4698c2ecf20Sopenharmony_ci	icsk->nata_syn_rto = TCP_TIMEOUT_INIT;
4708c2ecf20Sopenharmony_ci	icsk->nata_data_rto = TCP_TIMEOUT_INIT;
4718c2ecf20Sopenharmony_ci	icsk->nata_data_retries = 0;
4728c2ecf20Sopenharmony_ci#endif
4738c2ecf20Sopenharmony_ci}
4748c2ecf20Sopenharmony_ciEXPORT_SYMBOL(tcp_init_sock);
4758c2ecf20Sopenharmony_ci
4768c2ecf20Sopenharmony_cistatic void tcp_tx_timestamp(struct sock *sk, u16 tsflags)
4778c2ecf20Sopenharmony_ci{
4788c2ecf20Sopenharmony_ci	struct sk_buff *skb = tcp_write_queue_tail(sk);
4798c2ecf20Sopenharmony_ci
4808c2ecf20Sopenharmony_ci	if (tsflags && skb) {
4818c2ecf20Sopenharmony_ci		struct skb_shared_info *shinfo = skb_shinfo(skb);
4828c2ecf20Sopenharmony_ci		struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
4838c2ecf20Sopenharmony_ci
4848c2ecf20Sopenharmony_ci		sock_tx_timestamp(sk, tsflags, &shinfo->tx_flags);
4858c2ecf20Sopenharmony_ci		if (tsflags & SOF_TIMESTAMPING_TX_ACK)
4868c2ecf20Sopenharmony_ci			tcb->txstamp_ack = 1;
4878c2ecf20Sopenharmony_ci		if (tsflags & SOF_TIMESTAMPING_TX_RECORD_MASK)
4888c2ecf20Sopenharmony_ci			shinfo->tskey = TCP_SKB_CB(skb)->seq + skb->len - 1;
4898c2ecf20Sopenharmony_ci	}
4908c2ecf20Sopenharmony_ci}
4918c2ecf20Sopenharmony_ci
4928c2ecf20Sopenharmony_cistatic inline bool tcp_stream_is_readable(const struct tcp_sock *tp,
4938c2ecf20Sopenharmony_ci					  int target, struct sock *sk)
4948c2ecf20Sopenharmony_ci{
4958c2ecf20Sopenharmony_ci	int avail = READ_ONCE(tp->rcv_nxt) - READ_ONCE(tp->copied_seq);
4968c2ecf20Sopenharmony_ci
4978c2ecf20Sopenharmony_ci	if (avail > 0) {
4988c2ecf20Sopenharmony_ci		if (avail >= target)
4998c2ecf20Sopenharmony_ci			return true;
5008c2ecf20Sopenharmony_ci		if (tcp_rmem_pressure(sk))
5018c2ecf20Sopenharmony_ci			return true;
5028c2ecf20Sopenharmony_ci		if (tcp_receive_window(tp) <= inet_csk(sk)->icsk_ack.rcv_mss)
5038c2ecf20Sopenharmony_ci			return true;
5048c2ecf20Sopenharmony_ci	}
5058c2ecf20Sopenharmony_ci	if (sk->sk_prot->stream_memory_read)
5068c2ecf20Sopenharmony_ci		return sk->sk_prot->stream_memory_read(sk);
5078c2ecf20Sopenharmony_ci	return false;
5088c2ecf20Sopenharmony_ci}
5098c2ecf20Sopenharmony_ci
5108c2ecf20Sopenharmony_ci/*
5118c2ecf20Sopenharmony_ci *	Wait for a TCP event.
5128c2ecf20Sopenharmony_ci *
5138c2ecf20Sopenharmony_ci *	Note that we don't need to lock the socket, as the upper poll layers
5148c2ecf20Sopenharmony_ci *	take care of normal races (between the test and the event) and we don't
5158c2ecf20Sopenharmony_ci *	go look at any of the socket buffers directly.
5168c2ecf20Sopenharmony_ci */
5178c2ecf20Sopenharmony_ci__poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
5188c2ecf20Sopenharmony_ci{
5198c2ecf20Sopenharmony_ci	__poll_t mask;
5208c2ecf20Sopenharmony_ci	struct sock *sk = sock->sk;
5218c2ecf20Sopenharmony_ci	const struct tcp_sock *tp = tcp_sk(sk);
5228c2ecf20Sopenharmony_ci	u8 shutdown;
5238c2ecf20Sopenharmony_ci	int state;
5248c2ecf20Sopenharmony_ci
5258c2ecf20Sopenharmony_ci	sock_poll_wait(file, sock, wait);
5268c2ecf20Sopenharmony_ci
5278c2ecf20Sopenharmony_ci	state = inet_sk_state_load(sk);
5288c2ecf20Sopenharmony_ci	if (state == TCP_LISTEN)
5298c2ecf20Sopenharmony_ci		return inet_csk_listen_poll(sk);
5308c2ecf20Sopenharmony_ci
5318c2ecf20Sopenharmony_ci	/* Socket is not locked. We are protected from async events
5328c2ecf20Sopenharmony_ci	 * by poll logic and correct handling of state changes
5338c2ecf20Sopenharmony_ci	 * made by other threads is impossible in any case.
5348c2ecf20Sopenharmony_ci	 */
5358c2ecf20Sopenharmony_ci
5368c2ecf20Sopenharmony_ci	mask = 0;
5378c2ecf20Sopenharmony_ci
5388c2ecf20Sopenharmony_ci	/*
5398c2ecf20Sopenharmony_ci	 * EPOLLHUP is certainly not done right. But poll() doesn't
5408c2ecf20Sopenharmony_ci	 * have a notion of HUP in just one direction, and for a
5418c2ecf20Sopenharmony_ci	 * socket the read side is more interesting.
5428c2ecf20Sopenharmony_ci	 *
5438c2ecf20Sopenharmony_ci	 * Some poll() documentation says that EPOLLHUP is incompatible
5448c2ecf20Sopenharmony_ci	 * with the EPOLLOUT/POLLWR flags, so somebody should check this
5458c2ecf20Sopenharmony_ci	 * all. But careful, it tends to be safer to return too many
5468c2ecf20Sopenharmony_ci	 * bits than too few, and you can easily break real applications
5478c2ecf20Sopenharmony_ci	 * if you don't tell them that something has hung up!
5488c2ecf20Sopenharmony_ci	 *
5498c2ecf20Sopenharmony_ci	 * Check-me.
5508c2ecf20Sopenharmony_ci	 *
5518c2ecf20Sopenharmony_ci	 * Check number 1. EPOLLHUP is _UNMASKABLE_ event (see UNIX98 and
5528c2ecf20Sopenharmony_ci	 * our fs/select.c). It means that after we received EOF,
5538c2ecf20Sopenharmony_ci	 * poll always returns immediately, making impossible poll() on write()
5548c2ecf20Sopenharmony_ci	 * in state CLOSE_WAIT. One solution is evident --- to set EPOLLHUP
5558c2ecf20Sopenharmony_ci	 * if and only if shutdown has been made in both directions.
5568c2ecf20Sopenharmony_ci	 * Actually, it is interesting to look how Solaris and DUX
5578c2ecf20Sopenharmony_ci	 * solve this dilemma. I would prefer, if EPOLLHUP were maskable,
5588c2ecf20Sopenharmony_ci	 * then we could set it on SND_SHUTDOWN. BTW examples given
5598c2ecf20Sopenharmony_ci	 * in Stevens' books assume exactly this behaviour, it explains
5608c2ecf20Sopenharmony_ci	 * why EPOLLHUP is incompatible with EPOLLOUT.	--ANK
5618c2ecf20Sopenharmony_ci	 *
5628c2ecf20Sopenharmony_ci	 * NOTE. Check for TCP_CLOSE is added. The goal is to prevent
5638c2ecf20Sopenharmony_ci	 * blocking on fresh not-connected or disconnected socket. --ANK
5648c2ecf20Sopenharmony_ci	 */
5658c2ecf20Sopenharmony_ci	shutdown = READ_ONCE(sk->sk_shutdown);
5668c2ecf20Sopenharmony_ci	if (shutdown == SHUTDOWN_MASK || state == TCP_CLOSE)
5678c2ecf20Sopenharmony_ci		mask |= EPOLLHUP;
5688c2ecf20Sopenharmony_ci	if (shutdown & RCV_SHUTDOWN)
5698c2ecf20Sopenharmony_ci		mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
5708c2ecf20Sopenharmony_ci
5718c2ecf20Sopenharmony_ci	/* Connected or passive Fast Open socket? */
5728c2ecf20Sopenharmony_ci	if (state != TCP_SYN_SENT &&
5738c2ecf20Sopenharmony_ci	    (state != TCP_SYN_RECV || rcu_access_pointer(tp->fastopen_rsk))) {
5748c2ecf20Sopenharmony_ci		int target = sock_rcvlowat(sk, 0, INT_MAX);
5758c2ecf20Sopenharmony_ci
5768c2ecf20Sopenharmony_ci		if (READ_ONCE(tp->urg_seq) == READ_ONCE(tp->copied_seq) &&
5778c2ecf20Sopenharmony_ci		    !sock_flag(sk, SOCK_URGINLINE) &&
5788c2ecf20Sopenharmony_ci		    tp->urg_data)
5798c2ecf20Sopenharmony_ci			target++;
5808c2ecf20Sopenharmony_ci
5818c2ecf20Sopenharmony_ci		if (tcp_stream_is_readable(tp, target, sk))
5828c2ecf20Sopenharmony_ci			mask |= EPOLLIN | EPOLLRDNORM;
5838c2ecf20Sopenharmony_ci
5848c2ecf20Sopenharmony_ci		if (!(shutdown & SEND_SHUTDOWN)) {
5858c2ecf20Sopenharmony_ci			if (__sk_stream_is_writeable(sk, 1)) {
5868c2ecf20Sopenharmony_ci				mask |= EPOLLOUT | EPOLLWRNORM;
5878c2ecf20Sopenharmony_ci			} else {  /* send SIGIO later */
5888c2ecf20Sopenharmony_ci				sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
5898c2ecf20Sopenharmony_ci				set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
5908c2ecf20Sopenharmony_ci
5918c2ecf20Sopenharmony_ci				/* Race breaker. If space is freed after
5928c2ecf20Sopenharmony_ci				 * wspace test but before the flags are set,
5938c2ecf20Sopenharmony_ci				 * IO signal will be lost. Memory barrier
5948c2ecf20Sopenharmony_ci				 * pairs with the input side.
5958c2ecf20Sopenharmony_ci				 */
5968c2ecf20Sopenharmony_ci				smp_mb__after_atomic();
5978c2ecf20Sopenharmony_ci				if (__sk_stream_is_writeable(sk, 1))
5988c2ecf20Sopenharmony_ci					mask |= EPOLLOUT | EPOLLWRNORM;
5998c2ecf20Sopenharmony_ci			}
6008c2ecf20Sopenharmony_ci		} else
6018c2ecf20Sopenharmony_ci			mask |= EPOLLOUT | EPOLLWRNORM;
6028c2ecf20Sopenharmony_ci
6038c2ecf20Sopenharmony_ci		if (tp->urg_data & TCP_URG_VALID)
6048c2ecf20Sopenharmony_ci			mask |= EPOLLPRI;
6058c2ecf20Sopenharmony_ci	} else if (state == TCP_SYN_SENT && inet_sk(sk)->defer_connect) {
6068c2ecf20Sopenharmony_ci		/* Active TCP fastopen socket with defer_connect
6078c2ecf20Sopenharmony_ci		 * Return EPOLLOUT so application can call write()
6088c2ecf20Sopenharmony_ci		 * in order for kernel to generate SYN+data
6098c2ecf20Sopenharmony_ci		 */
6108c2ecf20Sopenharmony_ci		mask |= EPOLLOUT | EPOLLWRNORM;
6118c2ecf20Sopenharmony_ci	}
6128c2ecf20Sopenharmony_ci	/* This barrier is coupled with smp_wmb() in tcp_reset() */
6138c2ecf20Sopenharmony_ci	smp_rmb();
6148c2ecf20Sopenharmony_ci	if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
6158c2ecf20Sopenharmony_ci		mask |= EPOLLERR;
6168c2ecf20Sopenharmony_ci
6178c2ecf20Sopenharmony_ci	return mask;
6188c2ecf20Sopenharmony_ci}
6198c2ecf20Sopenharmony_ciEXPORT_SYMBOL(tcp_poll);
6208c2ecf20Sopenharmony_ci
6218c2ecf20Sopenharmony_ciint tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
6228c2ecf20Sopenharmony_ci{
6238c2ecf20Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
6248c2ecf20Sopenharmony_ci	int answ;
6258c2ecf20Sopenharmony_ci	bool slow;
6268c2ecf20Sopenharmony_ci
6278c2ecf20Sopenharmony_ci	switch (cmd) {
6288c2ecf20Sopenharmony_ci	case SIOCINQ:
6298c2ecf20Sopenharmony_ci		if (sk->sk_state == TCP_LISTEN)
6308c2ecf20Sopenharmony_ci			return -EINVAL;
6318c2ecf20Sopenharmony_ci
6328c2ecf20Sopenharmony_ci		slow = lock_sock_fast(sk);
6338c2ecf20Sopenharmony_ci		answ = tcp_inq(sk);
6348c2ecf20Sopenharmony_ci		unlock_sock_fast(sk, slow);
6358c2ecf20Sopenharmony_ci		break;
6368c2ecf20Sopenharmony_ci	case SIOCATMARK:
6378c2ecf20Sopenharmony_ci		answ = tp->urg_data &&
6388c2ecf20Sopenharmony_ci		       READ_ONCE(tp->urg_seq) == READ_ONCE(tp->copied_seq);
6398c2ecf20Sopenharmony_ci		break;
6408c2ecf20Sopenharmony_ci	case SIOCOUTQ:
6418c2ecf20Sopenharmony_ci		if (sk->sk_state == TCP_LISTEN)
6428c2ecf20Sopenharmony_ci			return -EINVAL;
6438c2ecf20Sopenharmony_ci
6448c2ecf20Sopenharmony_ci		if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))
6458c2ecf20Sopenharmony_ci			answ = 0;
6468c2ecf20Sopenharmony_ci		else
6478c2ecf20Sopenharmony_ci			answ = READ_ONCE(tp->write_seq) - tp->snd_una;
6488c2ecf20Sopenharmony_ci		break;
6498c2ecf20Sopenharmony_ci	case SIOCOUTQNSD:
6508c2ecf20Sopenharmony_ci		if (sk->sk_state == TCP_LISTEN)
6518c2ecf20Sopenharmony_ci			return -EINVAL;
6528c2ecf20Sopenharmony_ci
6538c2ecf20Sopenharmony_ci		if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))
6548c2ecf20Sopenharmony_ci			answ = 0;
6558c2ecf20Sopenharmony_ci		else
6568c2ecf20Sopenharmony_ci			answ = READ_ONCE(tp->write_seq) -
6578c2ecf20Sopenharmony_ci			       READ_ONCE(tp->snd_nxt);
6588c2ecf20Sopenharmony_ci		break;
6598c2ecf20Sopenharmony_ci	default:
6608c2ecf20Sopenharmony_ci		return -ENOIOCTLCMD;
6618c2ecf20Sopenharmony_ci	}
6628c2ecf20Sopenharmony_ci
6638c2ecf20Sopenharmony_ci	return put_user(answ, (int __user *)arg);
6648c2ecf20Sopenharmony_ci}
6658c2ecf20Sopenharmony_ciEXPORT_SYMBOL(tcp_ioctl);
6668c2ecf20Sopenharmony_ci
6678c2ecf20Sopenharmony_cistatic inline void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb)
6688c2ecf20Sopenharmony_ci{
6698c2ecf20Sopenharmony_ci	TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH;
6708c2ecf20Sopenharmony_ci	tp->pushed_seq = tp->write_seq;
6718c2ecf20Sopenharmony_ci}
6728c2ecf20Sopenharmony_ci
6738c2ecf20Sopenharmony_cistatic inline bool forced_push(const struct tcp_sock *tp)
6748c2ecf20Sopenharmony_ci{
6758c2ecf20Sopenharmony_ci	return after(tp->write_seq, tp->pushed_seq + (tp->max_window >> 1));
6768c2ecf20Sopenharmony_ci}
6778c2ecf20Sopenharmony_ci
6788c2ecf20Sopenharmony_cistatic void skb_entail(struct sock *sk, struct sk_buff *skb)
6798c2ecf20Sopenharmony_ci{
6808c2ecf20Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
6818c2ecf20Sopenharmony_ci	struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
6828c2ecf20Sopenharmony_ci
6838c2ecf20Sopenharmony_ci	skb->csum    = 0;
6848c2ecf20Sopenharmony_ci	tcb->seq     = tcb->end_seq = tp->write_seq;
6858c2ecf20Sopenharmony_ci	tcb->tcp_flags = TCPHDR_ACK;
6868c2ecf20Sopenharmony_ci	tcb->sacked  = 0;
6878c2ecf20Sopenharmony_ci	__skb_header_release(skb);
6888c2ecf20Sopenharmony_ci	tcp_add_write_queue_tail(sk, skb);
6898c2ecf20Sopenharmony_ci	sk_wmem_queued_add(sk, skb->truesize);
6908c2ecf20Sopenharmony_ci	sk_mem_charge(sk, skb->truesize);
6918c2ecf20Sopenharmony_ci	if (tp->nonagle & TCP_NAGLE_PUSH)
6928c2ecf20Sopenharmony_ci		tp->nonagle &= ~TCP_NAGLE_PUSH;
6938c2ecf20Sopenharmony_ci
6948c2ecf20Sopenharmony_ci	tcp_slow_start_after_idle_check(sk);
6958c2ecf20Sopenharmony_ci}
6968c2ecf20Sopenharmony_ci
6978c2ecf20Sopenharmony_cistatic inline void tcp_mark_urg(struct tcp_sock *tp, int flags)
6988c2ecf20Sopenharmony_ci{
6998c2ecf20Sopenharmony_ci	if (flags & MSG_OOB)
7008c2ecf20Sopenharmony_ci		tp->snd_up = tp->write_seq;
7018c2ecf20Sopenharmony_ci}
7028c2ecf20Sopenharmony_ci
7038c2ecf20Sopenharmony_ci/* If a not yet filled skb is pushed, do not send it if
7048c2ecf20Sopenharmony_ci * we have data packets in Qdisc or NIC queues :
7058c2ecf20Sopenharmony_ci * Because TX completion will happen shortly, it gives a chance
7068c2ecf20Sopenharmony_ci * to coalesce future sendmsg() payload into this skb, without
7078c2ecf20Sopenharmony_ci * need for a timer, and with no latency trade off.
7088c2ecf20Sopenharmony_ci * As packets containing data payload have a bigger truesize
7098c2ecf20Sopenharmony_ci * than pure acks (dataless) packets, the last checks prevent
7108c2ecf20Sopenharmony_ci * autocorking if we only have an ACK in Qdisc/NIC queues,
7118c2ecf20Sopenharmony_ci * or if TX completion was delayed after we processed ACK packet.
7128c2ecf20Sopenharmony_ci */
7138c2ecf20Sopenharmony_cistatic bool tcp_should_autocork(struct sock *sk, struct sk_buff *skb,
7148c2ecf20Sopenharmony_ci				int size_goal)
7158c2ecf20Sopenharmony_ci{
7168c2ecf20Sopenharmony_ci	return skb->len < size_goal &&
7178c2ecf20Sopenharmony_ci	       READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_autocorking) &&
7188c2ecf20Sopenharmony_ci	       !tcp_rtx_queue_empty(sk) &&
7198c2ecf20Sopenharmony_ci	       refcount_read(&sk->sk_wmem_alloc) > skb->truesize;
7208c2ecf20Sopenharmony_ci}
7218c2ecf20Sopenharmony_ci
7228c2ecf20Sopenharmony_civoid tcp_push(struct sock *sk, int flags, int mss_now,
7238c2ecf20Sopenharmony_ci	      int nonagle, int size_goal)
7248c2ecf20Sopenharmony_ci{
7258c2ecf20Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
7268c2ecf20Sopenharmony_ci	struct sk_buff *skb;
7278c2ecf20Sopenharmony_ci
7288c2ecf20Sopenharmony_ci	skb = tcp_write_queue_tail(sk);
7298c2ecf20Sopenharmony_ci	if (!skb)
7308c2ecf20Sopenharmony_ci		return;
7318c2ecf20Sopenharmony_ci	if (!(flags & MSG_MORE) || forced_push(tp))
7328c2ecf20Sopenharmony_ci		tcp_mark_push(tp, skb);
7338c2ecf20Sopenharmony_ci
7348c2ecf20Sopenharmony_ci	tcp_mark_urg(tp, flags);
7358c2ecf20Sopenharmony_ci
7368c2ecf20Sopenharmony_ci	if (tcp_should_autocork(sk, skb, size_goal)) {
7378c2ecf20Sopenharmony_ci
7388c2ecf20Sopenharmony_ci		/* avoid atomic op if TSQ_THROTTLED bit is already set */
7398c2ecf20Sopenharmony_ci		if (!test_bit(TSQ_THROTTLED, &sk->sk_tsq_flags)) {
7408c2ecf20Sopenharmony_ci			NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAUTOCORKING);
7418c2ecf20Sopenharmony_ci			set_bit(TSQ_THROTTLED, &sk->sk_tsq_flags);
7428c2ecf20Sopenharmony_ci			smp_mb__after_atomic();
7438c2ecf20Sopenharmony_ci		}
7448c2ecf20Sopenharmony_ci		/* It is possible TX completion already happened
7458c2ecf20Sopenharmony_ci		 * before we set TSQ_THROTTLED.
7468c2ecf20Sopenharmony_ci		 */
7478c2ecf20Sopenharmony_ci		if (refcount_read(&sk->sk_wmem_alloc) > skb->truesize)
7488c2ecf20Sopenharmony_ci			return;
7498c2ecf20Sopenharmony_ci	}
7508c2ecf20Sopenharmony_ci
7518c2ecf20Sopenharmony_ci	if (flags & MSG_MORE)
7528c2ecf20Sopenharmony_ci		nonagle = TCP_NAGLE_CORK;
7538c2ecf20Sopenharmony_ci
7548c2ecf20Sopenharmony_ci	__tcp_push_pending_frames(sk, mss_now, nonagle);
7558c2ecf20Sopenharmony_ci}
7568c2ecf20Sopenharmony_ci
7578c2ecf20Sopenharmony_cistatic int tcp_splice_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb,
7588c2ecf20Sopenharmony_ci				unsigned int offset, size_t len)
7598c2ecf20Sopenharmony_ci{
7608c2ecf20Sopenharmony_ci	struct tcp_splice_state *tss = rd_desc->arg.data;
7618c2ecf20Sopenharmony_ci	int ret;
7628c2ecf20Sopenharmony_ci
7638c2ecf20Sopenharmony_ci	ret = skb_splice_bits(skb, skb->sk, offset, tss->pipe,
7648c2ecf20Sopenharmony_ci			      min(rd_desc->count, len), tss->flags);
7658c2ecf20Sopenharmony_ci	if (ret > 0)
7668c2ecf20Sopenharmony_ci		rd_desc->count -= ret;
7678c2ecf20Sopenharmony_ci	return ret;
7688c2ecf20Sopenharmony_ci}
7698c2ecf20Sopenharmony_ci
7708c2ecf20Sopenharmony_cistatic int __tcp_splice_read(struct sock *sk, struct tcp_splice_state *tss)
7718c2ecf20Sopenharmony_ci{
7728c2ecf20Sopenharmony_ci	/* Store TCP splice context information in read_descriptor_t. */
7738c2ecf20Sopenharmony_ci	read_descriptor_t rd_desc = {
7748c2ecf20Sopenharmony_ci		.arg.data = tss,
7758c2ecf20Sopenharmony_ci		.count	  = tss->len,
7768c2ecf20Sopenharmony_ci	};
7778c2ecf20Sopenharmony_ci
7788c2ecf20Sopenharmony_ci	return tcp_read_sock(sk, &rd_desc, tcp_splice_data_recv);
7798c2ecf20Sopenharmony_ci}
7808c2ecf20Sopenharmony_ci
7818c2ecf20Sopenharmony_ci/**
7828c2ecf20Sopenharmony_ci *  tcp_splice_read - splice data from TCP socket to a pipe
7838c2ecf20Sopenharmony_ci * @sock:	socket to splice from
7848c2ecf20Sopenharmony_ci * @ppos:	position (not valid)
7858c2ecf20Sopenharmony_ci * @pipe:	pipe to splice to
7868c2ecf20Sopenharmony_ci * @len:	number of bytes to splice
7878c2ecf20Sopenharmony_ci * @flags:	splice modifier flags
7888c2ecf20Sopenharmony_ci *
7898c2ecf20Sopenharmony_ci * Description:
7908c2ecf20Sopenharmony_ci *    Will read pages from given socket and fill them into a pipe.
7918c2ecf20Sopenharmony_ci *
7928c2ecf20Sopenharmony_ci **/
7938c2ecf20Sopenharmony_cissize_t tcp_splice_read(struct socket *sock, loff_t *ppos,
7948c2ecf20Sopenharmony_ci			struct pipe_inode_info *pipe, size_t len,
7958c2ecf20Sopenharmony_ci			unsigned int flags)
7968c2ecf20Sopenharmony_ci{
7978c2ecf20Sopenharmony_ci	struct sock *sk = sock->sk;
7988c2ecf20Sopenharmony_ci	struct tcp_splice_state tss = {
7998c2ecf20Sopenharmony_ci		.pipe = pipe,
8008c2ecf20Sopenharmony_ci		.len = len,
8018c2ecf20Sopenharmony_ci		.flags = flags,
8028c2ecf20Sopenharmony_ci	};
8038c2ecf20Sopenharmony_ci	long timeo;
8048c2ecf20Sopenharmony_ci	ssize_t spliced;
8058c2ecf20Sopenharmony_ci	int ret;
8068c2ecf20Sopenharmony_ci
8078c2ecf20Sopenharmony_ci	sock_rps_record_flow(sk);
8088c2ecf20Sopenharmony_ci	/*
8098c2ecf20Sopenharmony_ci	 * We can't seek on a socket input
8108c2ecf20Sopenharmony_ci	 */
8118c2ecf20Sopenharmony_ci	if (unlikely(*ppos))
8128c2ecf20Sopenharmony_ci		return -ESPIPE;
8138c2ecf20Sopenharmony_ci
8148c2ecf20Sopenharmony_ci	ret = spliced = 0;
8158c2ecf20Sopenharmony_ci
8168c2ecf20Sopenharmony_ci	lock_sock(sk);
8178c2ecf20Sopenharmony_ci
8188c2ecf20Sopenharmony_ci	timeo = sock_rcvtimeo(sk, sock->file->f_flags & O_NONBLOCK);
8198c2ecf20Sopenharmony_ci	while (tss.len) {
8208c2ecf20Sopenharmony_ci		ret = __tcp_splice_read(sk, &tss);
8218c2ecf20Sopenharmony_ci		if (ret < 0)
8228c2ecf20Sopenharmony_ci			break;
8238c2ecf20Sopenharmony_ci		else if (!ret) {
8248c2ecf20Sopenharmony_ci			if (spliced)
8258c2ecf20Sopenharmony_ci				break;
8268c2ecf20Sopenharmony_ci			if (sock_flag(sk, SOCK_DONE))
8278c2ecf20Sopenharmony_ci				break;
8288c2ecf20Sopenharmony_ci			if (sk->sk_err) {
8298c2ecf20Sopenharmony_ci				ret = sock_error(sk);
8308c2ecf20Sopenharmony_ci				break;
8318c2ecf20Sopenharmony_ci			}
8328c2ecf20Sopenharmony_ci			if (sk->sk_shutdown & RCV_SHUTDOWN)
8338c2ecf20Sopenharmony_ci				break;
8348c2ecf20Sopenharmony_ci			if (sk->sk_state == TCP_CLOSE) {
8358c2ecf20Sopenharmony_ci				/*
8368c2ecf20Sopenharmony_ci				 * This occurs when user tries to read
8378c2ecf20Sopenharmony_ci				 * from never connected socket.
8388c2ecf20Sopenharmony_ci				 */
8398c2ecf20Sopenharmony_ci				ret = -ENOTCONN;
8408c2ecf20Sopenharmony_ci				break;
8418c2ecf20Sopenharmony_ci			}
8428c2ecf20Sopenharmony_ci			if (!timeo) {
8438c2ecf20Sopenharmony_ci				ret = -EAGAIN;
8448c2ecf20Sopenharmony_ci				break;
8458c2ecf20Sopenharmony_ci			}
8468c2ecf20Sopenharmony_ci			/* if __tcp_splice_read() got nothing while we have
8478c2ecf20Sopenharmony_ci			 * an skb in receive queue, we do not want to loop.
8488c2ecf20Sopenharmony_ci			 * This might happen with URG data.
8498c2ecf20Sopenharmony_ci			 */
8508c2ecf20Sopenharmony_ci			if (!skb_queue_empty(&sk->sk_receive_queue))
8518c2ecf20Sopenharmony_ci				break;
8528c2ecf20Sopenharmony_ci			sk_wait_data(sk, &timeo, NULL);
8538c2ecf20Sopenharmony_ci			if (signal_pending(current)) {
8548c2ecf20Sopenharmony_ci				ret = sock_intr_errno(timeo);
8558c2ecf20Sopenharmony_ci				break;
8568c2ecf20Sopenharmony_ci			}
8578c2ecf20Sopenharmony_ci			continue;
8588c2ecf20Sopenharmony_ci		}
8598c2ecf20Sopenharmony_ci		tss.len -= ret;
8608c2ecf20Sopenharmony_ci		spliced += ret;
8618c2ecf20Sopenharmony_ci
8628c2ecf20Sopenharmony_ci		if (!timeo)
8638c2ecf20Sopenharmony_ci			break;
8648c2ecf20Sopenharmony_ci		release_sock(sk);
8658c2ecf20Sopenharmony_ci		lock_sock(sk);
8668c2ecf20Sopenharmony_ci
8678c2ecf20Sopenharmony_ci		if (sk->sk_err || sk->sk_state == TCP_CLOSE ||
8688c2ecf20Sopenharmony_ci		    (sk->sk_shutdown & RCV_SHUTDOWN) ||
8698c2ecf20Sopenharmony_ci		    signal_pending(current))
8708c2ecf20Sopenharmony_ci			break;
8718c2ecf20Sopenharmony_ci	}
8728c2ecf20Sopenharmony_ci
8738c2ecf20Sopenharmony_ci	release_sock(sk);
8748c2ecf20Sopenharmony_ci
8758c2ecf20Sopenharmony_ci	if (spliced)
8768c2ecf20Sopenharmony_ci		return spliced;
8778c2ecf20Sopenharmony_ci
8788c2ecf20Sopenharmony_ci	return ret;
8798c2ecf20Sopenharmony_ci}
8808c2ecf20Sopenharmony_ciEXPORT_SYMBOL(tcp_splice_read);
8818c2ecf20Sopenharmony_ci
8828c2ecf20Sopenharmony_cistruct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp,
8838c2ecf20Sopenharmony_ci				    bool force_schedule)
8848c2ecf20Sopenharmony_ci{
8858c2ecf20Sopenharmony_ci	struct sk_buff *skb;
8868c2ecf20Sopenharmony_ci
8878c2ecf20Sopenharmony_ci	if (likely(!size)) {
8888c2ecf20Sopenharmony_ci		skb = sk->sk_tx_skb_cache;
8898c2ecf20Sopenharmony_ci		if (skb) {
8908c2ecf20Sopenharmony_ci			skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
8918c2ecf20Sopenharmony_ci			sk->sk_tx_skb_cache = NULL;
8928c2ecf20Sopenharmony_ci			pskb_trim(skb, 0);
8938c2ecf20Sopenharmony_ci			INIT_LIST_HEAD(&skb->tcp_tsorted_anchor);
8948c2ecf20Sopenharmony_ci			skb_shinfo(skb)->tx_flags = 0;
8958c2ecf20Sopenharmony_ci			memset(TCP_SKB_CB(skb), 0, sizeof(struct tcp_skb_cb));
8968c2ecf20Sopenharmony_ci			return skb;
8978c2ecf20Sopenharmony_ci		}
8988c2ecf20Sopenharmony_ci	}
8998c2ecf20Sopenharmony_ci	/* The TCP header must be at least 32-bit aligned.  */
9008c2ecf20Sopenharmony_ci	size = ALIGN(size, 4);
9018c2ecf20Sopenharmony_ci
9028c2ecf20Sopenharmony_ci	if (unlikely(tcp_under_memory_pressure(sk)))
9038c2ecf20Sopenharmony_ci		sk_mem_reclaim_partial(sk);
9048c2ecf20Sopenharmony_ci
9058c2ecf20Sopenharmony_ci	skb = alloc_skb_fclone(size + sk->sk_prot->max_header, gfp);
9068c2ecf20Sopenharmony_ci	if (likely(skb)) {
9078c2ecf20Sopenharmony_ci		bool mem_scheduled;
9088c2ecf20Sopenharmony_ci
9098c2ecf20Sopenharmony_ci		if (force_schedule) {
9108c2ecf20Sopenharmony_ci			mem_scheduled = true;
9118c2ecf20Sopenharmony_ci			sk_forced_mem_schedule(sk, skb->truesize);
9128c2ecf20Sopenharmony_ci		} else {
9138c2ecf20Sopenharmony_ci			mem_scheduled = sk_wmem_schedule(sk, skb->truesize);
9148c2ecf20Sopenharmony_ci		}
9158c2ecf20Sopenharmony_ci		if (likely(mem_scheduled)) {
9168c2ecf20Sopenharmony_ci			skb_reserve(skb, sk->sk_prot->max_header);
9178c2ecf20Sopenharmony_ci			/*
9188c2ecf20Sopenharmony_ci			 * Make sure that we have exactly size bytes
9198c2ecf20Sopenharmony_ci			 * available to the caller, no more, no less.
9208c2ecf20Sopenharmony_ci			 */
9218c2ecf20Sopenharmony_ci			skb->reserved_tailroom = skb->end - skb->tail - size;
9228c2ecf20Sopenharmony_ci			INIT_LIST_HEAD(&skb->tcp_tsorted_anchor);
9238c2ecf20Sopenharmony_ci			return skb;
9248c2ecf20Sopenharmony_ci		}
9258c2ecf20Sopenharmony_ci		__kfree_skb(skb);
9268c2ecf20Sopenharmony_ci	} else {
9278c2ecf20Sopenharmony_ci		sk->sk_prot->enter_memory_pressure(sk);
9288c2ecf20Sopenharmony_ci		sk_stream_moderate_sndbuf(sk);
9298c2ecf20Sopenharmony_ci	}
9308c2ecf20Sopenharmony_ci	return NULL;
9318c2ecf20Sopenharmony_ci}
9328c2ecf20Sopenharmony_ci
9338c2ecf20Sopenharmony_cistatic unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
9348c2ecf20Sopenharmony_ci				       int large_allowed)
9358c2ecf20Sopenharmony_ci{
9368c2ecf20Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
9378c2ecf20Sopenharmony_ci	u32 new_size_goal, size_goal;
9388c2ecf20Sopenharmony_ci
9398c2ecf20Sopenharmony_ci	if (!large_allowed)
9408c2ecf20Sopenharmony_ci		return mss_now;
9418c2ecf20Sopenharmony_ci
9428c2ecf20Sopenharmony_ci	/* Note : tcp_tso_autosize() will eventually split this later */
9438c2ecf20Sopenharmony_ci	new_size_goal = sk->sk_gso_max_size - 1 - MAX_TCP_HEADER;
9448c2ecf20Sopenharmony_ci	new_size_goal = tcp_bound_to_half_wnd(tp, new_size_goal);
9458c2ecf20Sopenharmony_ci
9468c2ecf20Sopenharmony_ci	/* We try hard to avoid divides here */
9478c2ecf20Sopenharmony_ci	size_goal = tp->gso_segs * mss_now;
9488c2ecf20Sopenharmony_ci	if (unlikely(new_size_goal < size_goal ||
9498c2ecf20Sopenharmony_ci		     new_size_goal >= size_goal + mss_now)) {
9508c2ecf20Sopenharmony_ci		tp->gso_segs = min_t(u16, new_size_goal / mss_now,
9518c2ecf20Sopenharmony_ci				     sk->sk_gso_max_segs);
9528c2ecf20Sopenharmony_ci		size_goal = tp->gso_segs * mss_now;
9538c2ecf20Sopenharmony_ci	}
9548c2ecf20Sopenharmony_ci
9558c2ecf20Sopenharmony_ci	return max(size_goal, mss_now);
9568c2ecf20Sopenharmony_ci}
9578c2ecf20Sopenharmony_ci
9588c2ecf20Sopenharmony_ciint tcp_send_mss(struct sock *sk, int *size_goal, int flags)
9598c2ecf20Sopenharmony_ci{
9608c2ecf20Sopenharmony_ci	int mss_now;
9618c2ecf20Sopenharmony_ci
9628c2ecf20Sopenharmony_ci	mss_now = tcp_current_mss(sk);
9638c2ecf20Sopenharmony_ci	*size_goal = tcp_xmit_size_goal(sk, mss_now, !(flags & MSG_OOB));
9648c2ecf20Sopenharmony_ci
9658c2ecf20Sopenharmony_ci	return mss_now;
9668c2ecf20Sopenharmony_ci}
9678c2ecf20Sopenharmony_ci
9688c2ecf20Sopenharmony_ci/* In some cases, both sendpage() and sendmsg() could have added
9698c2ecf20Sopenharmony_ci * an skb to the write queue, but failed adding payload on it.
9708c2ecf20Sopenharmony_ci * We need to remove it to consume less memory, but more
9718c2ecf20Sopenharmony_ci * importantly be able to generate EPOLLOUT for Edge Trigger epoll()
9728c2ecf20Sopenharmony_ci * users.
9738c2ecf20Sopenharmony_ci */
9748c2ecf20Sopenharmony_cistatic void tcp_remove_empty_skb(struct sock *sk, struct sk_buff *skb)
9758c2ecf20Sopenharmony_ci{
9768c2ecf20Sopenharmony_ci	if (skb && TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) {
9778c2ecf20Sopenharmony_ci		tcp_unlink_write_queue(skb, sk);
9788c2ecf20Sopenharmony_ci		if (tcp_write_queue_empty(sk))
9798c2ecf20Sopenharmony_ci			tcp_chrono_stop(sk, TCP_CHRONO_BUSY);
9808c2ecf20Sopenharmony_ci		sk_wmem_free_skb(sk, skb);
9818c2ecf20Sopenharmony_ci	}
9828c2ecf20Sopenharmony_ci}
9838c2ecf20Sopenharmony_ci
9848c2ecf20Sopenharmony_cissize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset,
9858c2ecf20Sopenharmony_ci			 size_t size, int flags)
9868c2ecf20Sopenharmony_ci{
9878c2ecf20Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
9888c2ecf20Sopenharmony_ci	int mss_now, size_goal;
9898c2ecf20Sopenharmony_ci	int err;
9908c2ecf20Sopenharmony_ci	ssize_t copied;
9918c2ecf20Sopenharmony_ci	long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
9928c2ecf20Sopenharmony_ci
9938c2ecf20Sopenharmony_ci	if (IS_ENABLED(CONFIG_DEBUG_VM) &&
9948c2ecf20Sopenharmony_ci	    WARN_ONCE(!sendpage_ok(page),
9958c2ecf20Sopenharmony_ci		      "page must not be a Slab one and have page_count > 0"))
9968c2ecf20Sopenharmony_ci		return -EINVAL;
9978c2ecf20Sopenharmony_ci
9988c2ecf20Sopenharmony_ci	/* Wait for a connection to finish. One exception is TCP Fast Open
9998c2ecf20Sopenharmony_ci	 * (passive side) where data is allowed to be sent before a connection
10008c2ecf20Sopenharmony_ci	 * is fully established.
10018c2ecf20Sopenharmony_ci	 */
10028c2ecf20Sopenharmony_ci	if (((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) &&
10038c2ecf20Sopenharmony_ci	    !tcp_passive_fastopen(sk)) {
10048c2ecf20Sopenharmony_ci		err = sk_stream_wait_connect(sk, &timeo);
10058c2ecf20Sopenharmony_ci		if (err != 0)
10068c2ecf20Sopenharmony_ci			goto out_err;
10078c2ecf20Sopenharmony_ci	}
10088c2ecf20Sopenharmony_ci
10098c2ecf20Sopenharmony_ci	sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
10108c2ecf20Sopenharmony_ci
10118c2ecf20Sopenharmony_ci	mss_now = tcp_send_mss(sk, &size_goal, flags);
10128c2ecf20Sopenharmony_ci	copied = 0;
10138c2ecf20Sopenharmony_ci
10148c2ecf20Sopenharmony_ci	err = -EPIPE;
10158c2ecf20Sopenharmony_ci	if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
10168c2ecf20Sopenharmony_ci		goto out_err;
10178c2ecf20Sopenharmony_ci
10188c2ecf20Sopenharmony_ci	while (size > 0) {
10198c2ecf20Sopenharmony_ci		struct sk_buff *skb = tcp_write_queue_tail(sk);
10208c2ecf20Sopenharmony_ci		int copy, i;
10218c2ecf20Sopenharmony_ci		bool can_coalesce;
10228c2ecf20Sopenharmony_ci
10238c2ecf20Sopenharmony_ci		if (!skb || (copy = size_goal - skb->len) <= 0 ||
10248c2ecf20Sopenharmony_ci		    !tcp_skb_can_collapse_to(skb)) {
10258c2ecf20Sopenharmony_cinew_segment:
10268c2ecf20Sopenharmony_ci			if (!sk_stream_memory_free(sk))
10278c2ecf20Sopenharmony_ci				goto wait_for_space;
10288c2ecf20Sopenharmony_ci
10298c2ecf20Sopenharmony_ci			skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation,
10308c2ecf20Sopenharmony_ci					tcp_rtx_and_write_queues_empty(sk));
10318c2ecf20Sopenharmony_ci			if (!skb)
10328c2ecf20Sopenharmony_ci				goto wait_for_space;
10338c2ecf20Sopenharmony_ci
10348c2ecf20Sopenharmony_ci#ifdef CONFIG_TLS_DEVICE
10358c2ecf20Sopenharmony_ci			skb->decrypted = !!(flags & MSG_SENDPAGE_DECRYPTED);
10368c2ecf20Sopenharmony_ci#endif
10378c2ecf20Sopenharmony_ci			skb_entail(sk, skb);
10388c2ecf20Sopenharmony_ci			copy = size_goal;
10398c2ecf20Sopenharmony_ci		}
10408c2ecf20Sopenharmony_ci
10418c2ecf20Sopenharmony_ci		if (copy > size)
10428c2ecf20Sopenharmony_ci			copy = size;
10438c2ecf20Sopenharmony_ci
10448c2ecf20Sopenharmony_ci		i = skb_shinfo(skb)->nr_frags;
10458c2ecf20Sopenharmony_ci		can_coalesce = skb_can_coalesce(skb, i, page, offset);
10468c2ecf20Sopenharmony_ci		if (!can_coalesce && i >= sysctl_max_skb_frags) {
10478c2ecf20Sopenharmony_ci			tcp_mark_push(tp, skb);
10488c2ecf20Sopenharmony_ci			goto new_segment;
10498c2ecf20Sopenharmony_ci		}
10508c2ecf20Sopenharmony_ci		if (!sk_wmem_schedule(sk, copy))
10518c2ecf20Sopenharmony_ci			goto wait_for_space;
10528c2ecf20Sopenharmony_ci
10538c2ecf20Sopenharmony_ci		if (can_coalesce) {
10548c2ecf20Sopenharmony_ci			skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
10558c2ecf20Sopenharmony_ci		} else {
10568c2ecf20Sopenharmony_ci			get_page(page);
10578c2ecf20Sopenharmony_ci			skb_fill_page_desc(skb, i, page, offset, copy);
10588c2ecf20Sopenharmony_ci		}
10598c2ecf20Sopenharmony_ci
10608c2ecf20Sopenharmony_ci		if (!(flags & MSG_NO_SHARED_FRAGS))
10618c2ecf20Sopenharmony_ci			skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
10628c2ecf20Sopenharmony_ci
10638c2ecf20Sopenharmony_ci		skb->len += copy;
10648c2ecf20Sopenharmony_ci		skb->data_len += copy;
10658c2ecf20Sopenharmony_ci		skb->truesize += copy;
10668c2ecf20Sopenharmony_ci		sk_wmem_queued_add(sk, copy);
10678c2ecf20Sopenharmony_ci		sk_mem_charge(sk, copy);
10688c2ecf20Sopenharmony_ci		skb->ip_summed = CHECKSUM_PARTIAL;
10698c2ecf20Sopenharmony_ci		WRITE_ONCE(tp->write_seq, tp->write_seq + copy);
10708c2ecf20Sopenharmony_ci		TCP_SKB_CB(skb)->end_seq += copy;
10718c2ecf20Sopenharmony_ci		tcp_skb_pcount_set(skb, 0);
10728c2ecf20Sopenharmony_ci
10738c2ecf20Sopenharmony_ci		if (!copied)
10748c2ecf20Sopenharmony_ci			TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH;
10758c2ecf20Sopenharmony_ci
10768c2ecf20Sopenharmony_ci		copied += copy;
10778c2ecf20Sopenharmony_ci		offset += copy;
10788c2ecf20Sopenharmony_ci		size -= copy;
10798c2ecf20Sopenharmony_ci		if (!size)
10808c2ecf20Sopenharmony_ci			goto out;
10818c2ecf20Sopenharmony_ci
10828c2ecf20Sopenharmony_ci		if (skb->len < size_goal || (flags & MSG_OOB))
10838c2ecf20Sopenharmony_ci			continue;
10848c2ecf20Sopenharmony_ci
10858c2ecf20Sopenharmony_ci		if (forced_push(tp)) {
10868c2ecf20Sopenharmony_ci			tcp_mark_push(tp, skb);
10878c2ecf20Sopenharmony_ci			__tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_PUSH);
10888c2ecf20Sopenharmony_ci		} else if (skb == tcp_send_head(sk))
10898c2ecf20Sopenharmony_ci			tcp_push_one(sk, mss_now);
10908c2ecf20Sopenharmony_ci		continue;
10918c2ecf20Sopenharmony_ci
10928c2ecf20Sopenharmony_ciwait_for_space:
10938c2ecf20Sopenharmony_ci		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
10948c2ecf20Sopenharmony_ci		tcp_push(sk, flags & ~MSG_MORE, mss_now,
10958c2ecf20Sopenharmony_ci			 TCP_NAGLE_PUSH, size_goal);
10968c2ecf20Sopenharmony_ci
10978c2ecf20Sopenharmony_ci		err = sk_stream_wait_memory(sk, &timeo);
10988c2ecf20Sopenharmony_ci		if (err != 0)
10998c2ecf20Sopenharmony_ci			goto do_error;
11008c2ecf20Sopenharmony_ci
11018c2ecf20Sopenharmony_ci		mss_now = tcp_send_mss(sk, &size_goal, flags);
11028c2ecf20Sopenharmony_ci	}
11038c2ecf20Sopenharmony_ci
11048c2ecf20Sopenharmony_ciout:
11058c2ecf20Sopenharmony_ci	if (copied) {
11068c2ecf20Sopenharmony_ci		tcp_tx_timestamp(sk, sk->sk_tsflags);
11078c2ecf20Sopenharmony_ci		if (!(flags & MSG_SENDPAGE_NOTLAST))
11088c2ecf20Sopenharmony_ci			tcp_push(sk, flags, mss_now, tp->nonagle, size_goal);
11098c2ecf20Sopenharmony_ci	}
11108c2ecf20Sopenharmony_ci	return copied;
11118c2ecf20Sopenharmony_ci
11128c2ecf20Sopenharmony_cido_error:
11138c2ecf20Sopenharmony_ci	tcp_remove_empty_skb(sk, tcp_write_queue_tail(sk));
11148c2ecf20Sopenharmony_ci	if (copied)
11158c2ecf20Sopenharmony_ci		goto out;
11168c2ecf20Sopenharmony_ciout_err:
11178c2ecf20Sopenharmony_ci	/* make sure we wake any epoll edge trigger waiter */
11188c2ecf20Sopenharmony_ci	if (unlikely(tcp_rtx_and_write_queues_empty(sk) && err == -EAGAIN)) {
11198c2ecf20Sopenharmony_ci		sk->sk_write_space(sk);
11208c2ecf20Sopenharmony_ci		tcp_chrono_stop(sk, TCP_CHRONO_SNDBUF_LIMITED);
11218c2ecf20Sopenharmony_ci	}
11228c2ecf20Sopenharmony_ci	return sk_stream_error(sk, flags, err);
11238c2ecf20Sopenharmony_ci}
11248c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(do_tcp_sendpages);
11258c2ecf20Sopenharmony_ci
11268c2ecf20Sopenharmony_ciint tcp_sendpage_locked(struct sock *sk, struct page *page, int offset,
11278c2ecf20Sopenharmony_ci			size_t size, int flags)
11288c2ecf20Sopenharmony_ci{
11298c2ecf20Sopenharmony_ci	if (!(sk->sk_route_caps & NETIF_F_SG))
11308c2ecf20Sopenharmony_ci		return sock_no_sendpage_locked(sk, page, offset, size, flags);
11318c2ecf20Sopenharmony_ci
11328c2ecf20Sopenharmony_ci	tcp_rate_check_app_limited(sk);  /* is sending application-limited? */
11338c2ecf20Sopenharmony_ci
11348c2ecf20Sopenharmony_ci	return do_tcp_sendpages(sk, page, offset, size, flags);
11358c2ecf20Sopenharmony_ci}
11368c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(tcp_sendpage_locked);
11378c2ecf20Sopenharmony_ci
11388c2ecf20Sopenharmony_ciint tcp_sendpage(struct sock *sk, struct page *page, int offset,
11398c2ecf20Sopenharmony_ci		 size_t size, int flags)
11408c2ecf20Sopenharmony_ci{
11418c2ecf20Sopenharmony_ci	int ret;
11428c2ecf20Sopenharmony_ci
11438c2ecf20Sopenharmony_ci	lock_sock(sk);
11448c2ecf20Sopenharmony_ci	ret = tcp_sendpage_locked(sk, page, offset, size, flags);
11458c2ecf20Sopenharmony_ci	release_sock(sk);
11468c2ecf20Sopenharmony_ci
11478c2ecf20Sopenharmony_ci	return ret;
11488c2ecf20Sopenharmony_ci}
11498c2ecf20Sopenharmony_ciEXPORT_SYMBOL(tcp_sendpage);
11508c2ecf20Sopenharmony_ci
11518c2ecf20Sopenharmony_civoid tcp_free_fastopen_req(struct tcp_sock *tp)
11528c2ecf20Sopenharmony_ci{
11538c2ecf20Sopenharmony_ci	if (tp->fastopen_req) {
11548c2ecf20Sopenharmony_ci		kfree(tp->fastopen_req);
11558c2ecf20Sopenharmony_ci		tp->fastopen_req = NULL;
11568c2ecf20Sopenharmony_ci	}
11578c2ecf20Sopenharmony_ci}
11588c2ecf20Sopenharmony_ci
11598c2ecf20Sopenharmony_cistatic int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
11608c2ecf20Sopenharmony_ci				int *copied, size_t size,
11618c2ecf20Sopenharmony_ci				struct ubuf_info *uarg)
11628c2ecf20Sopenharmony_ci{
11638c2ecf20Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
11648c2ecf20Sopenharmony_ci	struct inet_sock *inet = inet_sk(sk);
11658c2ecf20Sopenharmony_ci	struct sockaddr *uaddr = msg->msg_name;
11668c2ecf20Sopenharmony_ci	int err, flags;
11678c2ecf20Sopenharmony_ci
11688c2ecf20Sopenharmony_ci	if (!(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen) &
11698c2ecf20Sopenharmony_ci	      TFO_CLIENT_ENABLE) ||
11708c2ecf20Sopenharmony_ci	    (uaddr && msg->msg_namelen >= sizeof(uaddr->sa_family) &&
11718c2ecf20Sopenharmony_ci	     uaddr->sa_family == AF_UNSPEC))
11728c2ecf20Sopenharmony_ci		return -EOPNOTSUPP;
11738c2ecf20Sopenharmony_ci	if (tp->fastopen_req)
11748c2ecf20Sopenharmony_ci		return -EALREADY; /* Another Fast Open is in progress */
11758c2ecf20Sopenharmony_ci
11768c2ecf20Sopenharmony_ci	tp->fastopen_req = kzalloc(sizeof(struct tcp_fastopen_request),
11778c2ecf20Sopenharmony_ci				   sk->sk_allocation);
11788c2ecf20Sopenharmony_ci	if (unlikely(!tp->fastopen_req))
11798c2ecf20Sopenharmony_ci		return -ENOBUFS;
11808c2ecf20Sopenharmony_ci	tp->fastopen_req->data = msg;
11818c2ecf20Sopenharmony_ci	tp->fastopen_req->size = size;
11828c2ecf20Sopenharmony_ci	tp->fastopen_req->uarg = uarg;
11838c2ecf20Sopenharmony_ci
11848c2ecf20Sopenharmony_ci	if (inet->defer_connect) {
11858c2ecf20Sopenharmony_ci		err = tcp_connect(sk);
11868c2ecf20Sopenharmony_ci		/* Same failure procedure as in tcp_v4/6_connect */
11878c2ecf20Sopenharmony_ci		if (err) {
11888c2ecf20Sopenharmony_ci			tcp_set_state(sk, TCP_CLOSE);
11898c2ecf20Sopenharmony_ci			inet->inet_dport = 0;
11908c2ecf20Sopenharmony_ci			sk->sk_route_caps = 0;
11918c2ecf20Sopenharmony_ci		}
11928c2ecf20Sopenharmony_ci	}
11938c2ecf20Sopenharmony_ci	flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0;
11948c2ecf20Sopenharmony_ci	err = __inet_stream_connect(sk->sk_socket, uaddr,
11958c2ecf20Sopenharmony_ci				    msg->msg_namelen, flags, 1);
11968c2ecf20Sopenharmony_ci	/* fastopen_req could already be freed in __inet_stream_connect
11978c2ecf20Sopenharmony_ci	 * if the connection times out or gets rst
11988c2ecf20Sopenharmony_ci	 */
11998c2ecf20Sopenharmony_ci	if (tp->fastopen_req) {
12008c2ecf20Sopenharmony_ci		*copied = tp->fastopen_req->copied;
12018c2ecf20Sopenharmony_ci		tcp_free_fastopen_req(tp);
12028c2ecf20Sopenharmony_ci		inet->defer_connect = 0;
12038c2ecf20Sopenharmony_ci	}
12048c2ecf20Sopenharmony_ci	return err;
12058c2ecf20Sopenharmony_ci}
12068c2ecf20Sopenharmony_ci
12078c2ecf20Sopenharmony_ciint tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
12088c2ecf20Sopenharmony_ci{
12098c2ecf20Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
12108c2ecf20Sopenharmony_ci	struct ubuf_info *uarg = NULL;
12118c2ecf20Sopenharmony_ci	struct sk_buff *skb;
12128c2ecf20Sopenharmony_ci	struct sockcm_cookie sockc;
12138c2ecf20Sopenharmony_ci	int flags, err, copied = 0;
12148c2ecf20Sopenharmony_ci	int mss_now = 0, size_goal, copied_syn = 0;
12158c2ecf20Sopenharmony_ci	int process_backlog = 0;
12168c2ecf20Sopenharmony_ci	bool zc = false;
12178c2ecf20Sopenharmony_ci	long timeo;
12188c2ecf20Sopenharmony_ci
12198c2ecf20Sopenharmony_ci	flags = msg->msg_flags;
12208c2ecf20Sopenharmony_ci
12218c2ecf20Sopenharmony_ci	if (flags & MSG_ZEROCOPY && size && sock_flag(sk, SOCK_ZEROCOPY)) {
12228c2ecf20Sopenharmony_ci		skb = tcp_write_queue_tail(sk);
12238c2ecf20Sopenharmony_ci		uarg = sock_zerocopy_realloc(sk, size, skb_zcopy(skb));
12248c2ecf20Sopenharmony_ci		if (!uarg) {
12258c2ecf20Sopenharmony_ci			err = -ENOBUFS;
12268c2ecf20Sopenharmony_ci			goto out_err;
12278c2ecf20Sopenharmony_ci		}
12288c2ecf20Sopenharmony_ci
12298c2ecf20Sopenharmony_ci		zc = sk->sk_route_caps & NETIF_F_SG;
12308c2ecf20Sopenharmony_ci		if (!zc)
12318c2ecf20Sopenharmony_ci			uarg->zerocopy = 0;
12328c2ecf20Sopenharmony_ci	}
12338c2ecf20Sopenharmony_ci
12348c2ecf20Sopenharmony_ci	if (unlikely(flags & MSG_FASTOPEN || inet_sk(sk)->defer_connect) &&
12358c2ecf20Sopenharmony_ci	    !tp->repair) {
12368c2ecf20Sopenharmony_ci		err = tcp_sendmsg_fastopen(sk, msg, &copied_syn, size, uarg);
12378c2ecf20Sopenharmony_ci		if (err == -EINPROGRESS && copied_syn > 0)
12388c2ecf20Sopenharmony_ci			goto out;
12398c2ecf20Sopenharmony_ci		else if (err)
12408c2ecf20Sopenharmony_ci			goto out_err;
12418c2ecf20Sopenharmony_ci	}
12428c2ecf20Sopenharmony_ci
12438c2ecf20Sopenharmony_ci	timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
12448c2ecf20Sopenharmony_ci
12458c2ecf20Sopenharmony_ci	tcp_rate_check_app_limited(sk);  /* is sending application-limited? */
12468c2ecf20Sopenharmony_ci
12478c2ecf20Sopenharmony_ci	/* Wait for a connection to finish. One exception is TCP Fast Open
12488c2ecf20Sopenharmony_ci	 * (passive side) where data is allowed to be sent before a connection
12498c2ecf20Sopenharmony_ci	 * is fully established.
12508c2ecf20Sopenharmony_ci	 */
12518c2ecf20Sopenharmony_ci	if (((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) &&
12528c2ecf20Sopenharmony_ci	    !tcp_passive_fastopen(sk)) {
12538c2ecf20Sopenharmony_ci		err = sk_stream_wait_connect(sk, &timeo);
12548c2ecf20Sopenharmony_ci		if (err != 0)
12558c2ecf20Sopenharmony_ci			goto do_error;
12568c2ecf20Sopenharmony_ci	}
12578c2ecf20Sopenharmony_ci
12588c2ecf20Sopenharmony_ci	if (unlikely(tp->repair)) {
12598c2ecf20Sopenharmony_ci		if (tp->repair_queue == TCP_RECV_QUEUE) {
12608c2ecf20Sopenharmony_ci			copied = tcp_send_rcvq(sk, msg, size);
12618c2ecf20Sopenharmony_ci			goto out_nopush;
12628c2ecf20Sopenharmony_ci		}
12638c2ecf20Sopenharmony_ci
12648c2ecf20Sopenharmony_ci		err = -EINVAL;
12658c2ecf20Sopenharmony_ci		if (tp->repair_queue == TCP_NO_QUEUE)
12668c2ecf20Sopenharmony_ci			goto out_err;
12678c2ecf20Sopenharmony_ci
12688c2ecf20Sopenharmony_ci		/* 'common' sending to sendq */
12698c2ecf20Sopenharmony_ci	}
12708c2ecf20Sopenharmony_ci
12718c2ecf20Sopenharmony_ci	sockcm_init(&sockc, sk);
12728c2ecf20Sopenharmony_ci	if (msg->msg_controllen) {
12738c2ecf20Sopenharmony_ci		err = sock_cmsg_send(sk, msg, &sockc);
12748c2ecf20Sopenharmony_ci		if (unlikely(err)) {
12758c2ecf20Sopenharmony_ci			err = -EINVAL;
12768c2ecf20Sopenharmony_ci			goto out_err;
12778c2ecf20Sopenharmony_ci		}
12788c2ecf20Sopenharmony_ci	}
12798c2ecf20Sopenharmony_ci
12808c2ecf20Sopenharmony_ci	/* This should be in poll */
12818c2ecf20Sopenharmony_ci	sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
12828c2ecf20Sopenharmony_ci
12838c2ecf20Sopenharmony_ci	/* Ok commence sending. */
12848c2ecf20Sopenharmony_ci	copied = 0;
12858c2ecf20Sopenharmony_ci
12868c2ecf20Sopenharmony_cirestart:
12878c2ecf20Sopenharmony_ci	mss_now = tcp_send_mss(sk, &size_goal, flags);
12888c2ecf20Sopenharmony_ci
12898c2ecf20Sopenharmony_ci	err = -EPIPE;
12908c2ecf20Sopenharmony_ci	if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
12918c2ecf20Sopenharmony_ci		goto do_error;
12928c2ecf20Sopenharmony_ci
12938c2ecf20Sopenharmony_ci	while (msg_data_left(msg)) {
12948c2ecf20Sopenharmony_ci		int copy = 0;
12958c2ecf20Sopenharmony_ci
12968c2ecf20Sopenharmony_ci		skb = tcp_write_queue_tail(sk);
12978c2ecf20Sopenharmony_ci		if (skb)
12988c2ecf20Sopenharmony_ci			copy = size_goal - skb->len;
12998c2ecf20Sopenharmony_ci
13008c2ecf20Sopenharmony_ci		if (copy <= 0 || !tcp_skb_can_collapse_to(skb)) {
13018c2ecf20Sopenharmony_ci			bool first_skb;
13028c2ecf20Sopenharmony_ci
13038c2ecf20Sopenharmony_cinew_segment:
13048c2ecf20Sopenharmony_ci			if (!sk_stream_memory_free(sk))
13058c2ecf20Sopenharmony_ci				goto wait_for_space;
13068c2ecf20Sopenharmony_ci
13078c2ecf20Sopenharmony_ci			if (unlikely(process_backlog >= 16)) {
13088c2ecf20Sopenharmony_ci				process_backlog = 0;
13098c2ecf20Sopenharmony_ci				if (sk_flush_backlog(sk))
13108c2ecf20Sopenharmony_ci					goto restart;
13118c2ecf20Sopenharmony_ci			}
13128c2ecf20Sopenharmony_ci			first_skb = tcp_rtx_and_write_queues_empty(sk);
13138c2ecf20Sopenharmony_ci			skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation,
13148c2ecf20Sopenharmony_ci						  first_skb);
13158c2ecf20Sopenharmony_ci			if (!skb)
13168c2ecf20Sopenharmony_ci				goto wait_for_space;
13178c2ecf20Sopenharmony_ci
13188c2ecf20Sopenharmony_ci			process_backlog++;
13198c2ecf20Sopenharmony_ci			skb->ip_summed = CHECKSUM_PARTIAL;
13208c2ecf20Sopenharmony_ci
13218c2ecf20Sopenharmony_ci			skb_entail(sk, skb);
13228c2ecf20Sopenharmony_ci			copy = size_goal;
13238c2ecf20Sopenharmony_ci
13248c2ecf20Sopenharmony_ci			/* All packets are restored as if they have
13258c2ecf20Sopenharmony_ci			 * already been sent. skb_mstamp_ns isn't set to
13268c2ecf20Sopenharmony_ci			 * avoid wrong rtt estimation.
13278c2ecf20Sopenharmony_ci			 */
13288c2ecf20Sopenharmony_ci			if (tp->repair)
13298c2ecf20Sopenharmony_ci				TCP_SKB_CB(skb)->sacked |= TCPCB_REPAIRED;
13308c2ecf20Sopenharmony_ci		}
13318c2ecf20Sopenharmony_ci
13328c2ecf20Sopenharmony_ci		/* Try to append data to the end of skb. */
13338c2ecf20Sopenharmony_ci		if (copy > msg_data_left(msg))
13348c2ecf20Sopenharmony_ci			copy = msg_data_left(msg);
13358c2ecf20Sopenharmony_ci
13368c2ecf20Sopenharmony_ci		/* Where to copy to? */
13378c2ecf20Sopenharmony_ci		if (skb_availroom(skb) > 0 && !zc) {
13388c2ecf20Sopenharmony_ci			/* We have some space in skb head. Superb! */
13398c2ecf20Sopenharmony_ci			copy = min_t(int, copy, skb_availroom(skb));
13408c2ecf20Sopenharmony_ci			err = skb_add_data_nocache(sk, skb, &msg->msg_iter, copy);
13418c2ecf20Sopenharmony_ci			if (err)
13428c2ecf20Sopenharmony_ci				goto do_fault;
13438c2ecf20Sopenharmony_ci		} else if (!zc) {
13448c2ecf20Sopenharmony_ci			bool merge = true;
13458c2ecf20Sopenharmony_ci			int i = skb_shinfo(skb)->nr_frags;
13468c2ecf20Sopenharmony_ci			struct page_frag *pfrag = sk_page_frag(sk);
13478c2ecf20Sopenharmony_ci
13488c2ecf20Sopenharmony_ci			if (!sk_page_frag_refill(sk, pfrag))
13498c2ecf20Sopenharmony_ci				goto wait_for_space;
13508c2ecf20Sopenharmony_ci
13518c2ecf20Sopenharmony_ci			if (!skb_can_coalesce(skb, i, pfrag->page,
13528c2ecf20Sopenharmony_ci					      pfrag->offset)) {
13538c2ecf20Sopenharmony_ci				if (i >= sysctl_max_skb_frags) {
13548c2ecf20Sopenharmony_ci					tcp_mark_push(tp, skb);
13558c2ecf20Sopenharmony_ci					goto new_segment;
13568c2ecf20Sopenharmony_ci				}
13578c2ecf20Sopenharmony_ci				merge = false;
13588c2ecf20Sopenharmony_ci			}
13598c2ecf20Sopenharmony_ci
13608c2ecf20Sopenharmony_ci			copy = min_t(int, copy, pfrag->size - pfrag->offset);
13618c2ecf20Sopenharmony_ci
13628c2ecf20Sopenharmony_ci			if (!sk_wmem_schedule(sk, copy))
13638c2ecf20Sopenharmony_ci				goto wait_for_space;
13648c2ecf20Sopenharmony_ci
13658c2ecf20Sopenharmony_ci			err = skb_copy_to_page_nocache(sk, &msg->msg_iter, skb,
13668c2ecf20Sopenharmony_ci						       pfrag->page,
13678c2ecf20Sopenharmony_ci						       pfrag->offset,
13688c2ecf20Sopenharmony_ci						       copy);
13698c2ecf20Sopenharmony_ci			if (err)
13708c2ecf20Sopenharmony_ci				goto do_error;
13718c2ecf20Sopenharmony_ci
13728c2ecf20Sopenharmony_ci			/* Update the skb. */
13738c2ecf20Sopenharmony_ci			if (merge) {
13748c2ecf20Sopenharmony_ci				skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
13758c2ecf20Sopenharmony_ci			} else {
13768c2ecf20Sopenharmony_ci				skb_fill_page_desc(skb, i, pfrag->page,
13778c2ecf20Sopenharmony_ci						   pfrag->offset, copy);
13788c2ecf20Sopenharmony_ci				page_ref_inc(pfrag->page);
13798c2ecf20Sopenharmony_ci			}
13808c2ecf20Sopenharmony_ci			pfrag->offset += copy;
13818c2ecf20Sopenharmony_ci		} else {
13828c2ecf20Sopenharmony_ci			if (!sk_wmem_schedule(sk, copy))
13838c2ecf20Sopenharmony_ci				goto wait_for_space;
13848c2ecf20Sopenharmony_ci
13858c2ecf20Sopenharmony_ci			err = skb_zerocopy_iter_stream(sk, skb, msg, copy, uarg);
13868c2ecf20Sopenharmony_ci			if (err == -EMSGSIZE || err == -EEXIST) {
13878c2ecf20Sopenharmony_ci				tcp_mark_push(tp, skb);
13888c2ecf20Sopenharmony_ci				goto new_segment;
13898c2ecf20Sopenharmony_ci			}
13908c2ecf20Sopenharmony_ci			if (err < 0)
13918c2ecf20Sopenharmony_ci				goto do_error;
13928c2ecf20Sopenharmony_ci			copy = err;
13938c2ecf20Sopenharmony_ci		}
13948c2ecf20Sopenharmony_ci
13958c2ecf20Sopenharmony_ci		if (!copied)
13968c2ecf20Sopenharmony_ci			TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH;
13978c2ecf20Sopenharmony_ci
13988c2ecf20Sopenharmony_ci		WRITE_ONCE(tp->write_seq, tp->write_seq + copy);
13998c2ecf20Sopenharmony_ci		TCP_SKB_CB(skb)->end_seq += copy;
14008c2ecf20Sopenharmony_ci		tcp_skb_pcount_set(skb, 0);
14018c2ecf20Sopenharmony_ci
14028c2ecf20Sopenharmony_ci		copied += copy;
14038c2ecf20Sopenharmony_ci		if (!msg_data_left(msg)) {
14048c2ecf20Sopenharmony_ci			if (unlikely(flags & MSG_EOR))
14058c2ecf20Sopenharmony_ci				TCP_SKB_CB(skb)->eor = 1;
14068c2ecf20Sopenharmony_ci			goto out;
14078c2ecf20Sopenharmony_ci		}
14088c2ecf20Sopenharmony_ci
14098c2ecf20Sopenharmony_ci		if (skb->len < size_goal || (flags & MSG_OOB) || unlikely(tp->repair))
14108c2ecf20Sopenharmony_ci			continue;
14118c2ecf20Sopenharmony_ci
14128c2ecf20Sopenharmony_ci		if (forced_push(tp)) {
14138c2ecf20Sopenharmony_ci			tcp_mark_push(tp, skb);
14148c2ecf20Sopenharmony_ci			__tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_PUSH);
14158c2ecf20Sopenharmony_ci		} else if (skb == tcp_send_head(sk))
14168c2ecf20Sopenharmony_ci			tcp_push_one(sk, mss_now);
14178c2ecf20Sopenharmony_ci		continue;
14188c2ecf20Sopenharmony_ci
14198c2ecf20Sopenharmony_ciwait_for_space:
14208c2ecf20Sopenharmony_ci		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
14218c2ecf20Sopenharmony_ci		if (copied)
14228c2ecf20Sopenharmony_ci			tcp_push(sk, flags & ~MSG_MORE, mss_now,
14238c2ecf20Sopenharmony_ci				 TCP_NAGLE_PUSH, size_goal);
14248c2ecf20Sopenharmony_ci
14258c2ecf20Sopenharmony_ci		err = sk_stream_wait_memory(sk, &timeo);
14268c2ecf20Sopenharmony_ci		if (err != 0)
14278c2ecf20Sopenharmony_ci			goto do_error;
14288c2ecf20Sopenharmony_ci
14298c2ecf20Sopenharmony_ci		mss_now = tcp_send_mss(sk, &size_goal, flags);
14308c2ecf20Sopenharmony_ci	}
14318c2ecf20Sopenharmony_ci
14328c2ecf20Sopenharmony_ciout:
14338c2ecf20Sopenharmony_ci	if (copied) {
14348c2ecf20Sopenharmony_ci		tcp_tx_timestamp(sk, sockc.tsflags);
14358c2ecf20Sopenharmony_ci		tcp_push(sk, flags, mss_now, tp->nonagle, size_goal);
14368c2ecf20Sopenharmony_ci	}
14378c2ecf20Sopenharmony_ciout_nopush:
14388c2ecf20Sopenharmony_ci	sock_zerocopy_put(uarg);
14398c2ecf20Sopenharmony_ci	return copied + copied_syn;
14408c2ecf20Sopenharmony_ci
14418c2ecf20Sopenharmony_cido_error:
14428c2ecf20Sopenharmony_ci	skb = tcp_write_queue_tail(sk);
14438c2ecf20Sopenharmony_cido_fault:
14448c2ecf20Sopenharmony_ci	tcp_remove_empty_skb(sk, skb);
14458c2ecf20Sopenharmony_ci
14468c2ecf20Sopenharmony_ci	if (copied + copied_syn)
14478c2ecf20Sopenharmony_ci		goto out;
14488c2ecf20Sopenharmony_ciout_err:
14498c2ecf20Sopenharmony_ci	sock_zerocopy_put_abort(uarg, true);
14508c2ecf20Sopenharmony_ci	err = sk_stream_error(sk, flags, err);
14518c2ecf20Sopenharmony_ci	/* make sure we wake any epoll edge trigger waiter */
14528c2ecf20Sopenharmony_ci	if (unlikely(tcp_rtx_and_write_queues_empty(sk) && err == -EAGAIN)) {
14538c2ecf20Sopenharmony_ci		sk->sk_write_space(sk);
14548c2ecf20Sopenharmony_ci		tcp_chrono_stop(sk, TCP_CHRONO_SNDBUF_LIMITED);
14558c2ecf20Sopenharmony_ci	}
14568c2ecf20Sopenharmony_ci	return err;
14578c2ecf20Sopenharmony_ci}
14588c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(tcp_sendmsg_locked);
14598c2ecf20Sopenharmony_ci
14608c2ecf20Sopenharmony_ciint tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
14618c2ecf20Sopenharmony_ci{
14628c2ecf20Sopenharmony_ci	int ret;
14638c2ecf20Sopenharmony_ci
14648c2ecf20Sopenharmony_ci	lock_sock(sk);
14658c2ecf20Sopenharmony_ci	ret = tcp_sendmsg_locked(sk, msg, size);
14668c2ecf20Sopenharmony_ci	release_sock(sk);
14678c2ecf20Sopenharmony_ci
14688c2ecf20Sopenharmony_ci	return ret;
14698c2ecf20Sopenharmony_ci}
14708c2ecf20Sopenharmony_ciEXPORT_SYMBOL(tcp_sendmsg);
14718c2ecf20Sopenharmony_ci
14728c2ecf20Sopenharmony_ci/*
14738c2ecf20Sopenharmony_ci *	Handle reading urgent data. BSD has very simple semantics for
14748c2ecf20Sopenharmony_ci *	this, no blocking and very strange errors 8)
14758c2ecf20Sopenharmony_ci */
14768c2ecf20Sopenharmony_ci
14778c2ecf20Sopenharmony_cistatic int tcp_recv_urg(struct sock *sk, struct msghdr *msg, int len, int flags)
14788c2ecf20Sopenharmony_ci{
14798c2ecf20Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
14808c2ecf20Sopenharmony_ci
14818c2ecf20Sopenharmony_ci	/* No URG data to read. */
14828c2ecf20Sopenharmony_ci	if (sock_flag(sk, SOCK_URGINLINE) || !tp->urg_data ||
14838c2ecf20Sopenharmony_ci	    tp->urg_data == TCP_URG_READ)
14848c2ecf20Sopenharmony_ci		return -EINVAL;	/* Yes this is right ! */
14858c2ecf20Sopenharmony_ci
14868c2ecf20Sopenharmony_ci	if (sk->sk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DONE))
14878c2ecf20Sopenharmony_ci		return -ENOTCONN;
14888c2ecf20Sopenharmony_ci
14898c2ecf20Sopenharmony_ci	if (tp->urg_data & TCP_URG_VALID) {
14908c2ecf20Sopenharmony_ci		int err = 0;
14918c2ecf20Sopenharmony_ci		char c = tp->urg_data;
14928c2ecf20Sopenharmony_ci
14938c2ecf20Sopenharmony_ci		if (!(flags & MSG_PEEK))
14948c2ecf20Sopenharmony_ci			tp->urg_data = TCP_URG_READ;
14958c2ecf20Sopenharmony_ci
14968c2ecf20Sopenharmony_ci		/* Read urgent data. */
14978c2ecf20Sopenharmony_ci		msg->msg_flags |= MSG_OOB;
14988c2ecf20Sopenharmony_ci
14998c2ecf20Sopenharmony_ci		if (len > 0) {
15008c2ecf20Sopenharmony_ci			if (!(flags & MSG_TRUNC))
15018c2ecf20Sopenharmony_ci				err = memcpy_to_msg(msg, &c, 1);
15028c2ecf20Sopenharmony_ci			len = 1;
15038c2ecf20Sopenharmony_ci		} else
15048c2ecf20Sopenharmony_ci			msg->msg_flags |= MSG_TRUNC;
15058c2ecf20Sopenharmony_ci
15068c2ecf20Sopenharmony_ci		return err ? -EFAULT : len;
15078c2ecf20Sopenharmony_ci	}
15088c2ecf20Sopenharmony_ci
15098c2ecf20Sopenharmony_ci	if (sk->sk_state == TCP_CLOSE || (sk->sk_shutdown & RCV_SHUTDOWN))
15108c2ecf20Sopenharmony_ci		return 0;
15118c2ecf20Sopenharmony_ci
15128c2ecf20Sopenharmony_ci	/* Fixed the recv(..., MSG_OOB) behaviour.  BSD docs and
15138c2ecf20Sopenharmony_ci	 * the available implementations agree in this case:
15148c2ecf20Sopenharmony_ci	 * this call should never block, independent of the
15158c2ecf20Sopenharmony_ci	 * blocking state of the socket.
15168c2ecf20Sopenharmony_ci	 * Mike <pall@rz.uni-karlsruhe.de>
15178c2ecf20Sopenharmony_ci	 */
15188c2ecf20Sopenharmony_ci	return -EAGAIN;
15198c2ecf20Sopenharmony_ci}
15208c2ecf20Sopenharmony_ci
15218c2ecf20Sopenharmony_cistatic int tcp_peek_sndq(struct sock *sk, struct msghdr *msg, int len)
15228c2ecf20Sopenharmony_ci{
15238c2ecf20Sopenharmony_ci	struct sk_buff *skb;
15248c2ecf20Sopenharmony_ci	int copied = 0, err = 0;
15258c2ecf20Sopenharmony_ci
15268c2ecf20Sopenharmony_ci	/* XXX -- need to support SO_PEEK_OFF */
15278c2ecf20Sopenharmony_ci
15288c2ecf20Sopenharmony_ci	skb_rbtree_walk(skb, &sk->tcp_rtx_queue) {
15298c2ecf20Sopenharmony_ci		err = skb_copy_datagram_msg(skb, 0, msg, skb->len);
15308c2ecf20Sopenharmony_ci		if (err)
15318c2ecf20Sopenharmony_ci			return err;
15328c2ecf20Sopenharmony_ci		copied += skb->len;
15338c2ecf20Sopenharmony_ci	}
15348c2ecf20Sopenharmony_ci
15358c2ecf20Sopenharmony_ci	skb_queue_walk(&sk->sk_write_queue, skb) {
15368c2ecf20Sopenharmony_ci		err = skb_copy_datagram_msg(skb, 0, msg, skb->len);
15378c2ecf20Sopenharmony_ci		if (err)
15388c2ecf20Sopenharmony_ci			break;
15398c2ecf20Sopenharmony_ci
15408c2ecf20Sopenharmony_ci		copied += skb->len;
15418c2ecf20Sopenharmony_ci	}
15428c2ecf20Sopenharmony_ci
15438c2ecf20Sopenharmony_ci	return err ?: copied;
15448c2ecf20Sopenharmony_ci}
15458c2ecf20Sopenharmony_ci
15468c2ecf20Sopenharmony_ci/* Clean up the receive buffer for full frames taken by the user,
15478c2ecf20Sopenharmony_ci * then send an ACK if necessary.  COPIED is the number of bytes
15488c2ecf20Sopenharmony_ci * tcp_recvmsg has given to the user so far, it speeds up the
15498c2ecf20Sopenharmony_ci * calculation of whether or not we must ACK for the sake of
15508c2ecf20Sopenharmony_ci * a window update.
15518c2ecf20Sopenharmony_ci */
15528c2ecf20Sopenharmony_civoid tcp_cleanup_rbuf(struct sock *sk, int copied)
15538c2ecf20Sopenharmony_ci{
15548c2ecf20Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
15558c2ecf20Sopenharmony_ci	bool time_to_ack = false;
15568c2ecf20Sopenharmony_ci
15578c2ecf20Sopenharmony_ci	struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
15588c2ecf20Sopenharmony_ci
15598c2ecf20Sopenharmony_ci	WARN(skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq),
15608c2ecf20Sopenharmony_ci	     "cleanup rbuf bug: copied %X seq %X rcvnxt %X\n",
15618c2ecf20Sopenharmony_ci	     tp->copied_seq, TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt);
15628c2ecf20Sopenharmony_ci
15638c2ecf20Sopenharmony_ci	if (inet_csk_ack_scheduled(sk)) {
15648c2ecf20Sopenharmony_ci		const struct inet_connection_sock *icsk = inet_csk(sk);
15658c2ecf20Sopenharmony_ci		__u16 rcv_mss = icsk->icsk_ack.rcv_mss;
15668c2ecf20Sopenharmony_ci#ifdef CONFIG_LOWPOWER_PROTOCOL
15678c2ecf20Sopenharmony_ci		rcv_mss *= tcp_ack_num(sk);
15688c2ecf20Sopenharmony_ci#endif /* CONFIG_LOWPOWER_PROTOCOL */
15698c2ecf20Sopenharmony_ci
15708c2ecf20Sopenharmony_ci		if (/* Once-per-two-segments ACK was not sent by tcp_input.c */
15718c2ecf20Sopenharmony_ci		    tp->rcv_nxt - tp->rcv_wup > rcv_mss ||
15728c2ecf20Sopenharmony_ci		    /*
15738c2ecf20Sopenharmony_ci		     * If this read emptied read buffer, we send ACK, if
15748c2ecf20Sopenharmony_ci		     * connection is not bidirectional, user drained
15758c2ecf20Sopenharmony_ci		     * receive buffer and there was a small segment
15768c2ecf20Sopenharmony_ci		     * in queue.
15778c2ecf20Sopenharmony_ci		     */
15788c2ecf20Sopenharmony_ci		    (copied > 0 &&
15798c2ecf20Sopenharmony_ci		     ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED2) ||
15808c2ecf20Sopenharmony_ci		      ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED) &&
15818c2ecf20Sopenharmony_ci		       !inet_csk_in_pingpong_mode(sk))) &&
15828c2ecf20Sopenharmony_ci		      !atomic_read(&sk->sk_rmem_alloc)))
15838c2ecf20Sopenharmony_ci			time_to_ack = true;
15848c2ecf20Sopenharmony_ci	}
15858c2ecf20Sopenharmony_ci
15868c2ecf20Sopenharmony_ci	/* We send an ACK if we can now advertise a non-zero window
15878c2ecf20Sopenharmony_ci	 * which has been raised "significantly".
15888c2ecf20Sopenharmony_ci	 *
15898c2ecf20Sopenharmony_ci	 * Even if window raised up to infinity, do not send window open ACK
15908c2ecf20Sopenharmony_ci	 * in states, where we will not receive more. It is useless.
15918c2ecf20Sopenharmony_ci	 */
15928c2ecf20Sopenharmony_ci	if (copied > 0 && !time_to_ack && !(sk->sk_shutdown & RCV_SHUTDOWN)) {
15938c2ecf20Sopenharmony_ci		__u32 rcv_window_now = tcp_receive_window(tp);
15948c2ecf20Sopenharmony_ci
15958c2ecf20Sopenharmony_ci		/* Optimize, __tcp_select_window() is not cheap. */
15968c2ecf20Sopenharmony_ci		if (2*rcv_window_now <= tp->window_clamp) {
15978c2ecf20Sopenharmony_ci			__u32 new_window = __tcp_select_window(sk);
15988c2ecf20Sopenharmony_ci
15998c2ecf20Sopenharmony_ci			/* Send ACK now, if this read freed lots of space
16008c2ecf20Sopenharmony_ci			 * in our buffer. Certainly, new_window is new window.
16018c2ecf20Sopenharmony_ci			 * We can advertise it now, if it is not less than current one.
16028c2ecf20Sopenharmony_ci			 * "Lots" means "at least twice" here.
16038c2ecf20Sopenharmony_ci			 */
16048c2ecf20Sopenharmony_ci			if (new_window && new_window >= 2 * rcv_window_now)
16058c2ecf20Sopenharmony_ci				time_to_ack = true;
16068c2ecf20Sopenharmony_ci		}
16078c2ecf20Sopenharmony_ci	}
16088c2ecf20Sopenharmony_ci	if (time_to_ack)
16098c2ecf20Sopenharmony_ci		tcp_send_ack(sk);
16108c2ecf20Sopenharmony_ci}
16118c2ecf20Sopenharmony_ci
16128c2ecf20Sopenharmony_cistatic struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off)
16138c2ecf20Sopenharmony_ci{
16148c2ecf20Sopenharmony_ci	struct sk_buff *skb;
16158c2ecf20Sopenharmony_ci	u32 offset;
16168c2ecf20Sopenharmony_ci
16178c2ecf20Sopenharmony_ci	while ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) {
16188c2ecf20Sopenharmony_ci		offset = seq - TCP_SKB_CB(skb)->seq;
16198c2ecf20Sopenharmony_ci		if (unlikely(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) {
16208c2ecf20Sopenharmony_ci			pr_err_once("%s: found a SYN, please report !\n", __func__);
16218c2ecf20Sopenharmony_ci			offset--;
16228c2ecf20Sopenharmony_ci		}
16238c2ecf20Sopenharmony_ci		if (offset < skb->len || (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)) {
16248c2ecf20Sopenharmony_ci			*off = offset;
16258c2ecf20Sopenharmony_ci			return skb;
16268c2ecf20Sopenharmony_ci		}
16278c2ecf20Sopenharmony_ci		/* This looks weird, but this can happen if TCP collapsing
16288c2ecf20Sopenharmony_ci		 * splitted a fat GRO packet, while we released socket lock
16298c2ecf20Sopenharmony_ci		 * in skb_splice_bits()
16308c2ecf20Sopenharmony_ci		 */
16318c2ecf20Sopenharmony_ci		sk_eat_skb(sk, skb);
16328c2ecf20Sopenharmony_ci	}
16338c2ecf20Sopenharmony_ci	return NULL;
16348c2ecf20Sopenharmony_ci}
16358c2ecf20Sopenharmony_ci
16368c2ecf20Sopenharmony_ci/*
16378c2ecf20Sopenharmony_ci * This routine provides an alternative to tcp_recvmsg() for routines
16388c2ecf20Sopenharmony_ci * that would like to handle copying from skbuffs directly in 'sendfile'
16398c2ecf20Sopenharmony_ci * fashion.
16408c2ecf20Sopenharmony_ci * Note:
16418c2ecf20Sopenharmony_ci *	- It is assumed that the socket was locked by the caller.
16428c2ecf20Sopenharmony_ci *	- The routine does not block.
16438c2ecf20Sopenharmony_ci *	- At present, there is no support for reading OOB data
16448c2ecf20Sopenharmony_ci *	  or for 'peeking' the socket using this routine
16458c2ecf20Sopenharmony_ci *	  (although both would be easy to implement).
16468c2ecf20Sopenharmony_ci */
16478c2ecf20Sopenharmony_ciint tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
16488c2ecf20Sopenharmony_ci		  sk_read_actor_t recv_actor)
16498c2ecf20Sopenharmony_ci{
16508c2ecf20Sopenharmony_ci	struct sk_buff *skb;
16518c2ecf20Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
16528c2ecf20Sopenharmony_ci	u32 seq = tp->copied_seq;
16538c2ecf20Sopenharmony_ci	u32 offset;
16548c2ecf20Sopenharmony_ci	int copied = 0;
16558c2ecf20Sopenharmony_ci
16568c2ecf20Sopenharmony_ci	if (sk->sk_state == TCP_LISTEN)
16578c2ecf20Sopenharmony_ci		return -ENOTCONN;
16588c2ecf20Sopenharmony_ci	while ((skb = tcp_recv_skb(sk, seq, &offset)) != NULL) {
16598c2ecf20Sopenharmony_ci		if (offset < skb->len) {
16608c2ecf20Sopenharmony_ci			int used;
16618c2ecf20Sopenharmony_ci			size_t len;
16628c2ecf20Sopenharmony_ci
16638c2ecf20Sopenharmony_ci			len = skb->len - offset;
16648c2ecf20Sopenharmony_ci			/* Stop reading if we hit a patch of urgent data */
16658c2ecf20Sopenharmony_ci			if (tp->urg_data) {
16668c2ecf20Sopenharmony_ci				u32 urg_offset = tp->urg_seq - seq;
16678c2ecf20Sopenharmony_ci				if (urg_offset < len)
16688c2ecf20Sopenharmony_ci					len = urg_offset;
16698c2ecf20Sopenharmony_ci				if (!len)
16708c2ecf20Sopenharmony_ci					break;
16718c2ecf20Sopenharmony_ci			}
16728c2ecf20Sopenharmony_ci			used = recv_actor(desc, skb, offset, len);
16738c2ecf20Sopenharmony_ci			if (used <= 0) {
16748c2ecf20Sopenharmony_ci				if (!copied)
16758c2ecf20Sopenharmony_ci					copied = used;
16768c2ecf20Sopenharmony_ci				break;
16778c2ecf20Sopenharmony_ci			}
16788c2ecf20Sopenharmony_ci			if (WARN_ON_ONCE(used > len))
16798c2ecf20Sopenharmony_ci				used = len;
16808c2ecf20Sopenharmony_ci			seq += used;
16818c2ecf20Sopenharmony_ci			copied += used;
16828c2ecf20Sopenharmony_ci			offset += used;
16838c2ecf20Sopenharmony_ci
16848c2ecf20Sopenharmony_ci			/* If recv_actor drops the lock (e.g. TCP splice
16858c2ecf20Sopenharmony_ci			 * receive) the skb pointer might be invalid when
16868c2ecf20Sopenharmony_ci			 * getting here: tcp_collapse might have deleted it
16878c2ecf20Sopenharmony_ci			 * while aggregating skbs from the socket queue.
16888c2ecf20Sopenharmony_ci			 */
16898c2ecf20Sopenharmony_ci			skb = tcp_recv_skb(sk, seq - 1, &offset);
16908c2ecf20Sopenharmony_ci			if (!skb)
16918c2ecf20Sopenharmony_ci				break;
16928c2ecf20Sopenharmony_ci			/* TCP coalescing might have appended data to the skb.
16938c2ecf20Sopenharmony_ci			 * Try to splice more frags
16948c2ecf20Sopenharmony_ci			 */
16958c2ecf20Sopenharmony_ci			if (offset + 1 != skb->len)
16968c2ecf20Sopenharmony_ci				continue;
16978c2ecf20Sopenharmony_ci		}
16988c2ecf20Sopenharmony_ci		if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) {
16998c2ecf20Sopenharmony_ci			sk_eat_skb(sk, skb);
17008c2ecf20Sopenharmony_ci			++seq;
17018c2ecf20Sopenharmony_ci			break;
17028c2ecf20Sopenharmony_ci		}
17038c2ecf20Sopenharmony_ci		sk_eat_skb(sk, skb);
17048c2ecf20Sopenharmony_ci		if (!desc->count)
17058c2ecf20Sopenharmony_ci			break;
17068c2ecf20Sopenharmony_ci		WRITE_ONCE(tp->copied_seq, seq);
17078c2ecf20Sopenharmony_ci	}
17088c2ecf20Sopenharmony_ci	WRITE_ONCE(tp->copied_seq, seq);
17098c2ecf20Sopenharmony_ci
17108c2ecf20Sopenharmony_ci	tcp_rcv_space_adjust(sk);
17118c2ecf20Sopenharmony_ci
17128c2ecf20Sopenharmony_ci	/* Clean up data we have read: This will do ACK frames. */
17138c2ecf20Sopenharmony_ci	if (copied > 0) {
17148c2ecf20Sopenharmony_ci		tcp_recv_skb(sk, seq, &offset);
17158c2ecf20Sopenharmony_ci		tcp_cleanup_rbuf(sk, copied);
17168c2ecf20Sopenharmony_ci	}
17178c2ecf20Sopenharmony_ci	return copied;
17188c2ecf20Sopenharmony_ci}
17198c2ecf20Sopenharmony_ciEXPORT_SYMBOL(tcp_read_sock);
17208c2ecf20Sopenharmony_ci
17218c2ecf20Sopenharmony_ciint tcp_peek_len(struct socket *sock)
17228c2ecf20Sopenharmony_ci{
17238c2ecf20Sopenharmony_ci	return tcp_inq(sock->sk);
17248c2ecf20Sopenharmony_ci}
17258c2ecf20Sopenharmony_ciEXPORT_SYMBOL(tcp_peek_len);
17268c2ecf20Sopenharmony_ci
17278c2ecf20Sopenharmony_ci/* Make sure sk_rcvbuf is big enough to satisfy SO_RCVLOWAT hint */
17288c2ecf20Sopenharmony_ciint tcp_set_rcvlowat(struct sock *sk, int val)
17298c2ecf20Sopenharmony_ci{
17308c2ecf20Sopenharmony_ci	int cap;
17318c2ecf20Sopenharmony_ci
17328c2ecf20Sopenharmony_ci	if (sk->sk_userlocks & SOCK_RCVBUF_LOCK)
17338c2ecf20Sopenharmony_ci		cap = sk->sk_rcvbuf >> 1;
17348c2ecf20Sopenharmony_ci	else
17358c2ecf20Sopenharmony_ci		cap = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1;
17368c2ecf20Sopenharmony_ci	val = min(val, cap);
17378c2ecf20Sopenharmony_ci	WRITE_ONCE(sk->sk_rcvlowat, val ? : 1);
17388c2ecf20Sopenharmony_ci
17398c2ecf20Sopenharmony_ci	/* Check if we need to signal EPOLLIN right now */
17408c2ecf20Sopenharmony_ci	tcp_data_ready(sk);
17418c2ecf20Sopenharmony_ci
17428c2ecf20Sopenharmony_ci	if (sk->sk_userlocks & SOCK_RCVBUF_LOCK)
17438c2ecf20Sopenharmony_ci		return 0;
17448c2ecf20Sopenharmony_ci
17458c2ecf20Sopenharmony_ci	val <<= 1;
17468c2ecf20Sopenharmony_ci	if (val > sk->sk_rcvbuf) {
17478c2ecf20Sopenharmony_ci		WRITE_ONCE(sk->sk_rcvbuf, val);
17488c2ecf20Sopenharmony_ci		tcp_sk(sk)->window_clamp = tcp_win_from_space(sk, val);
17498c2ecf20Sopenharmony_ci	}
17508c2ecf20Sopenharmony_ci	return 0;
17518c2ecf20Sopenharmony_ci}
17528c2ecf20Sopenharmony_ciEXPORT_SYMBOL(tcp_set_rcvlowat);
17538c2ecf20Sopenharmony_ci
17548c2ecf20Sopenharmony_ci#ifdef CONFIG_MMU
17558c2ecf20Sopenharmony_cistatic const struct vm_operations_struct tcp_vm_ops = {
17568c2ecf20Sopenharmony_ci};
17578c2ecf20Sopenharmony_ci
17588c2ecf20Sopenharmony_ciint tcp_mmap(struct file *file, struct socket *sock,
17598c2ecf20Sopenharmony_ci	     struct vm_area_struct *vma)
17608c2ecf20Sopenharmony_ci{
17618c2ecf20Sopenharmony_ci	if (vma->vm_flags & (VM_WRITE | VM_EXEC))
17628c2ecf20Sopenharmony_ci		return -EPERM;
17638c2ecf20Sopenharmony_ci	vma->vm_flags &= ~(VM_MAYWRITE | VM_MAYEXEC);
17648c2ecf20Sopenharmony_ci
17658c2ecf20Sopenharmony_ci	/* Instruct vm_insert_page() to not mmap_read_lock(mm) */
17668c2ecf20Sopenharmony_ci	vma->vm_flags |= VM_MIXEDMAP;
17678c2ecf20Sopenharmony_ci
17688c2ecf20Sopenharmony_ci	vma->vm_ops = &tcp_vm_ops;
17698c2ecf20Sopenharmony_ci	return 0;
17708c2ecf20Sopenharmony_ci}
17718c2ecf20Sopenharmony_ciEXPORT_SYMBOL(tcp_mmap);
17728c2ecf20Sopenharmony_ci
17738c2ecf20Sopenharmony_cistatic skb_frag_t *skb_advance_to_frag(struct sk_buff *skb, u32 offset_skb,
17748c2ecf20Sopenharmony_ci				       u32 *offset_frag)
17758c2ecf20Sopenharmony_ci{
17768c2ecf20Sopenharmony_ci	skb_frag_t *frag;
17778c2ecf20Sopenharmony_ci
17788c2ecf20Sopenharmony_ci	if (unlikely(offset_skb >= skb->len))
17798c2ecf20Sopenharmony_ci		return NULL;
17808c2ecf20Sopenharmony_ci
17818c2ecf20Sopenharmony_ci	offset_skb -= skb_headlen(skb);
17828c2ecf20Sopenharmony_ci	if ((int)offset_skb < 0 || skb_has_frag_list(skb))
17838c2ecf20Sopenharmony_ci		return NULL;
17848c2ecf20Sopenharmony_ci
17858c2ecf20Sopenharmony_ci	frag = skb_shinfo(skb)->frags;
17868c2ecf20Sopenharmony_ci	while (offset_skb) {
17878c2ecf20Sopenharmony_ci		if (skb_frag_size(frag) > offset_skb) {
17888c2ecf20Sopenharmony_ci			*offset_frag = offset_skb;
17898c2ecf20Sopenharmony_ci			return frag;
17908c2ecf20Sopenharmony_ci		}
17918c2ecf20Sopenharmony_ci		offset_skb -= skb_frag_size(frag);
17928c2ecf20Sopenharmony_ci		++frag;
17938c2ecf20Sopenharmony_ci	}
17948c2ecf20Sopenharmony_ci	*offset_frag = 0;
17958c2ecf20Sopenharmony_ci	return frag;
17968c2ecf20Sopenharmony_ci}
17978c2ecf20Sopenharmony_ci
17988c2ecf20Sopenharmony_cistatic bool can_map_frag(const skb_frag_t *frag)
17998c2ecf20Sopenharmony_ci{
18008c2ecf20Sopenharmony_ci	struct page *page;
18018c2ecf20Sopenharmony_ci
18028c2ecf20Sopenharmony_ci	if (skb_frag_size(frag) != PAGE_SIZE || skb_frag_off(frag))
18038c2ecf20Sopenharmony_ci		return false;
18048c2ecf20Sopenharmony_ci
18058c2ecf20Sopenharmony_ci	page = skb_frag_page(frag);
18068c2ecf20Sopenharmony_ci
18078c2ecf20Sopenharmony_ci	if (PageCompound(page) || page->mapping)
18088c2ecf20Sopenharmony_ci		return false;
18098c2ecf20Sopenharmony_ci
18108c2ecf20Sopenharmony_ci	return true;
18118c2ecf20Sopenharmony_ci}
18128c2ecf20Sopenharmony_ci
18138c2ecf20Sopenharmony_cistatic int find_next_mappable_frag(const skb_frag_t *frag,
18148c2ecf20Sopenharmony_ci				   int remaining_in_skb)
18158c2ecf20Sopenharmony_ci{
18168c2ecf20Sopenharmony_ci	int offset = 0;
18178c2ecf20Sopenharmony_ci
18188c2ecf20Sopenharmony_ci	if (likely(can_map_frag(frag)))
18198c2ecf20Sopenharmony_ci		return 0;
18208c2ecf20Sopenharmony_ci
18218c2ecf20Sopenharmony_ci	while (offset < remaining_in_skb && !can_map_frag(frag)) {
18228c2ecf20Sopenharmony_ci		offset += skb_frag_size(frag);
18238c2ecf20Sopenharmony_ci		++frag;
18248c2ecf20Sopenharmony_ci	}
18258c2ecf20Sopenharmony_ci	return offset;
18268c2ecf20Sopenharmony_ci}
18278c2ecf20Sopenharmony_ci
18288c2ecf20Sopenharmony_cistatic int tcp_copy_straggler_data(struct tcp_zerocopy_receive *zc,
18298c2ecf20Sopenharmony_ci				   struct sk_buff *skb, u32 copylen,
18308c2ecf20Sopenharmony_ci				   u32 *offset, u32 *seq)
18318c2ecf20Sopenharmony_ci{
18328c2ecf20Sopenharmony_ci	unsigned long copy_address = (unsigned long)zc->copybuf_address;
18338c2ecf20Sopenharmony_ci	struct msghdr msg = {};
18348c2ecf20Sopenharmony_ci	struct iovec iov;
18358c2ecf20Sopenharmony_ci	int err;
18368c2ecf20Sopenharmony_ci
18378c2ecf20Sopenharmony_ci	if (copy_address != zc->copybuf_address)
18388c2ecf20Sopenharmony_ci		return -EINVAL;
18398c2ecf20Sopenharmony_ci
18408c2ecf20Sopenharmony_ci	err = import_single_range(READ, (void __user *)copy_address,
18418c2ecf20Sopenharmony_ci				  copylen, &iov, &msg.msg_iter);
18428c2ecf20Sopenharmony_ci	if (err)
18438c2ecf20Sopenharmony_ci		return err;
18448c2ecf20Sopenharmony_ci	err = skb_copy_datagram_msg(skb, *offset, &msg, copylen);
18458c2ecf20Sopenharmony_ci	if (err)
18468c2ecf20Sopenharmony_ci		return err;
18478c2ecf20Sopenharmony_ci	zc->recv_skip_hint -= copylen;
18488c2ecf20Sopenharmony_ci	*offset += copylen;
18498c2ecf20Sopenharmony_ci	*seq += copylen;
18508c2ecf20Sopenharmony_ci	return (__s32)copylen;
18518c2ecf20Sopenharmony_ci}
18528c2ecf20Sopenharmony_ci
18538c2ecf20Sopenharmony_cistatic int tcp_zerocopy_handle_leftover_data(struct tcp_zerocopy_receive *zc,
18548c2ecf20Sopenharmony_ci					     struct sock *sk,
18558c2ecf20Sopenharmony_ci					     struct sk_buff *skb,
18568c2ecf20Sopenharmony_ci					     u32 *seq,
18578c2ecf20Sopenharmony_ci					     s32 copybuf_len)
18588c2ecf20Sopenharmony_ci{
18598c2ecf20Sopenharmony_ci	u32 offset, copylen = min_t(u32, copybuf_len, zc->recv_skip_hint);
18608c2ecf20Sopenharmony_ci
18618c2ecf20Sopenharmony_ci	if (!copylen)
18628c2ecf20Sopenharmony_ci		return 0;
18638c2ecf20Sopenharmony_ci	/* skb is null if inq < PAGE_SIZE. */
18648c2ecf20Sopenharmony_ci	if (skb)
18658c2ecf20Sopenharmony_ci		offset = *seq - TCP_SKB_CB(skb)->seq;
18668c2ecf20Sopenharmony_ci	else
18678c2ecf20Sopenharmony_ci		skb = tcp_recv_skb(sk, *seq, &offset);
18688c2ecf20Sopenharmony_ci
18698c2ecf20Sopenharmony_ci	zc->copybuf_len = tcp_copy_straggler_data(zc, skb, copylen, &offset,
18708c2ecf20Sopenharmony_ci						  seq);
18718c2ecf20Sopenharmony_ci	return zc->copybuf_len < 0 ? 0 : copylen;
18728c2ecf20Sopenharmony_ci}
18738c2ecf20Sopenharmony_ci
18748c2ecf20Sopenharmony_cistatic int tcp_zerocopy_vm_insert_batch(struct vm_area_struct *vma,
18758c2ecf20Sopenharmony_ci					struct page **pages,
18768c2ecf20Sopenharmony_ci					unsigned long pages_to_map,
18778c2ecf20Sopenharmony_ci					unsigned long *insert_addr,
18788c2ecf20Sopenharmony_ci					u32 *length_with_pending,
18798c2ecf20Sopenharmony_ci					u32 *seq,
18808c2ecf20Sopenharmony_ci					struct tcp_zerocopy_receive *zc)
18818c2ecf20Sopenharmony_ci{
18828c2ecf20Sopenharmony_ci	unsigned long pages_remaining = pages_to_map;
18838c2ecf20Sopenharmony_ci	int bytes_mapped;
18848c2ecf20Sopenharmony_ci	int ret;
18858c2ecf20Sopenharmony_ci
18868c2ecf20Sopenharmony_ci	ret = vm_insert_pages(vma, *insert_addr, pages, &pages_remaining);
18878c2ecf20Sopenharmony_ci	bytes_mapped = PAGE_SIZE * (pages_to_map - pages_remaining);
18888c2ecf20Sopenharmony_ci	/* Even if vm_insert_pages fails, it may have partially succeeded in
18898c2ecf20Sopenharmony_ci	 * mapping (some but not all of the pages).
18908c2ecf20Sopenharmony_ci	 */
18918c2ecf20Sopenharmony_ci	*seq += bytes_mapped;
18928c2ecf20Sopenharmony_ci	*insert_addr += bytes_mapped;
18938c2ecf20Sopenharmony_ci	if (ret) {
18948c2ecf20Sopenharmony_ci		/* But if vm_insert_pages did fail, we have to unroll some state
18958c2ecf20Sopenharmony_ci		 * we speculatively touched before.
18968c2ecf20Sopenharmony_ci		 */
18978c2ecf20Sopenharmony_ci		const int bytes_not_mapped = PAGE_SIZE * pages_remaining;
18988c2ecf20Sopenharmony_ci		*length_with_pending -= bytes_not_mapped;
18998c2ecf20Sopenharmony_ci		zc->recv_skip_hint += bytes_not_mapped;
19008c2ecf20Sopenharmony_ci	}
19018c2ecf20Sopenharmony_ci	return ret;
19028c2ecf20Sopenharmony_ci}
19038c2ecf20Sopenharmony_ci
19048c2ecf20Sopenharmony_cistatic int tcp_zerocopy_receive(struct sock *sk,
19058c2ecf20Sopenharmony_ci				struct tcp_zerocopy_receive *zc)
19068c2ecf20Sopenharmony_ci{
19078c2ecf20Sopenharmony_ci	u32 length = 0, offset, vma_len, avail_len, aligned_len, copylen = 0;
19088c2ecf20Sopenharmony_ci	unsigned long address = (unsigned long)zc->address;
19098c2ecf20Sopenharmony_ci	s32 copybuf_len = zc->copybuf_len;
19108c2ecf20Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
19118c2ecf20Sopenharmony_ci	#define PAGE_BATCH_SIZE 8
19128c2ecf20Sopenharmony_ci	struct page *pages[PAGE_BATCH_SIZE];
19138c2ecf20Sopenharmony_ci	const skb_frag_t *frags = NULL;
19148c2ecf20Sopenharmony_ci	struct vm_area_struct *vma;
19158c2ecf20Sopenharmony_ci	struct sk_buff *skb = NULL;
19168c2ecf20Sopenharmony_ci	unsigned long pg_idx = 0;
19178c2ecf20Sopenharmony_ci	unsigned long curr_addr;
19188c2ecf20Sopenharmony_ci	u32 seq = tp->copied_seq;
19198c2ecf20Sopenharmony_ci	int inq = tcp_inq(sk);
19208c2ecf20Sopenharmony_ci	int ret;
19218c2ecf20Sopenharmony_ci
19228c2ecf20Sopenharmony_ci	zc->copybuf_len = 0;
19238c2ecf20Sopenharmony_ci
19248c2ecf20Sopenharmony_ci	if (address & (PAGE_SIZE - 1) || address != zc->address)
19258c2ecf20Sopenharmony_ci		return -EINVAL;
19268c2ecf20Sopenharmony_ci
19278c2ecf20Sopenharmony_ci	if (sk->sk_state == TCP_LISTEN)
19288c2ecf20Sopenharmony_ci		return -ENOTCONN;
19298c2ecf20Sopenharmony_ci
19308c2ecf20Sopenharmony_ci	sock_rps_record_flow(sk);
19318c2ecf20Sopenharmony_ci
19328c2ecf20Sopenharmony_ci	mmap_read_lock(current->mm);
19338c2ecf20Sopenharmony_ci
19348c2ecf20Sopenharmony_ci	vma = find_vma(current->mm, address);
19358c2ecf20Sopenharmony_ci	if (!vma || vma->vm_start > address || vma->vm_ops != &tcp_vm_ops) {
19368c2ecf20Sopenharmony_ci		mmap_read_unlock(current->mm);
19378c2ecf20Sopenharmony_ci		return -EINVAL;
19388c2ecf20Sopenharmony_ci	}
19398c2ecf20Sopenharmony_ci	vma_len = min_t(unsigned long, zc->length, vma->vm_end - address);
19408c2ecf20Sopenharmony_ci	avail_len = min_t(u32, vma_len, inq);
19418c2ecf20Sopenharmony_ci	aligned_len = avail_len & ~(PAGE_SIZE - 1);
19428c2ecf20Sopenharmony_ci	if (aligned_len) {
19438c2ecf20Sopenharmony_ci		zap_page_range(vma, address, aligned_len);
19448c2ecf20Sopenharmony_ci		zc->length = aligned_len;
19458c2ecf20Sopenharmony_ci		zc->recv_skip_hint = 0;
19468c2ecf20Sopenharmony_ci	} else {
19478c2ecf20Sopenharmony_ci		zc->length = avail_len;
19488c2ecf20Sopenharmony_ci		zc->recv_skip_hint = avail_len;
19498c2ecf20Sopenharmony_ci	}
19508c2ecf20Sopenharmony_ci	ret = 0;
19518c2ecf20Sopenharmony_ci	curr_addr = address;
19528c2ecf20Sopenharmony_ci	while (length + PAGE_SIZE <= zc->length) {
19538c2ecf20Sopenharmony_ci		int mappable_offset;
19548c2ecf20Sopenharmony_ci
19558c2ecf20Sopenharmony_ci		if (zc->recv_skip_hint < PAGE_SIZE) {
19568c2ecf20Sopenharmony_ci			u32 offset_frag;
19578c2ecf20Sopenharmony_ci
19588c2ecf20Sopenharmony_ci			/* If we're here, finish the current batch. */
19598c2ecf20Sopenharmony_ci			if (pg_idx) {
19608c2ecf20Sopenharmony_ci				ret = tcp_zerocopy_vm_insert_batch(vma, pages,
19618c2ecf20Sopenharmony_ci								   pg_idx,
19628c2ecf20Sopenharmony_ci								   &curr_addr,
19638c2ecf20Sopenharmony_ci								   &length,
19648c2ecf20Sopenharmony_ci								   &seq, zc);
19658c2ecf20Sopenharmony_ci				if (ret)
19668c2ecf20Sopenharmony_ci					goto out;
19678c2ecf20Sopenharmony_ci				pg_idx = 0;
19688c2ecf20Sopenharmony_ci			}
19698c2ecf20Sopenharmony_ci			if (skb) {
19708c2ecf20Sopenharmony_ci				if (zc->recv_skip_hint > 0)
19718c2ecf20Sopenharmony_ci					break;
19728c2ecf20Sopenharmony_ci				skb = skb->next;
19738c2ecf20Sopenharmony_ci				offset = seq - TCP_SKB_CB(skb)->seq;
19748c2ecf20Sopenharmony_ci			} else {
19758c2ecf20Sopenharmony_ci				skb = tcp_recv_skb(sk, seq, &offset);
19768c2ecf20Sopenharmony_ci			}
19778c2ecf20Sopenharmony_ci			zc->recv_skip_hint = skb->len - offset;
19788c2ecf20Sopenharmony_ci			frags = skb_advance_to_frag(skb, offset, &offset_frag);
19798c2ecf20Sopenharmony_ci			if (!frags || offset_frag)
19808c2ecf20Sopenharmony_ci				break;
19818c2ecf20Sopenharmony_ci		}
19828c2ecf20Sopenharmony_ci
19838c2ecf20Sopenharmony_ci		mappable_offset = find_next_mappable_frag(frags,
19848c2ecf20Sopenharmony_ci							  zc->recv_skip_hint);
19858c2ecf20Sopenharmony_ci		if (mappable_offset) {
19868c2ecf20Sopenharmony_ci			zc->recv_skip_hint = mappable_offset;
19878c2ecf20Sopenharmony_ci			break;
19888c2ecf20Sopenharmony_ci		}
19898c2ecf20Sopenharmony_ci		pages[pg_idx] = skb_frag_page(frags);
19908c2ecf20Sopenharmony_ci		pg_idx++;
19918c2ecf20Sopenharmony_ci		length += PAGE_SIZE;
19928c2ecf20Sopenharmony_ci		zc->recv_skip_hint -= PAGE_SIZE;
19938c2ecf20Sopenharmony_ci		frags++;
19948c2ecf20Sopenharmony_ci		if (pg_idx == PAGE_BATCH_SIZE) {
19958c2ecf20Sopenharmony_ci			ret = tcp_zerocopy_vm_insert_batch(vma, pages, pg_idx,
19968c2ecf20Sopenharmony_ci							   &curr_addr, &length,
19978c2ecf20Sopenharmony_ci							   &seq, zc);
19988c2ecf20Sopenharmony_ci			if (ret)
19998c2ecf20Sopenharmony_ci				goto out;
20008c2ecf20Sopenharmony_ci			pg_idx = 0;
20018c2ecf20Sopenharmony_ci		}
20028c2ecf20Sopenharmony_ci	}
20038c2ecf20Sopenharmony_ci	if (pg_idx) {
20048c2ecf20Sopenharmony_ci		ret = tcp_zerocopy_vm_insert_batch(vma, pages, pg_idx,
20058c2ecf20Sopenharmony_ci						   &curr_addr, &length, &seq,
20068c2ecf20Sopenharmony_ci						   zc);
20078c2ecf20Sopenharmony_ci	}
20088c2ecf20Sopenharmony_ciout:
20098c2ecf20Sopenharmony_ci	mmap_read_unlock(current->mm);
20108c2ecf20Sopenharmony_ci	/* Try to copy straggler data. */
20118c2ecf20Sopenharmony_ci	if (!ret)
20128c2ecf20Sopenharmony_ci		copylen = tcp_zerocopy_handle_leftover_data(zc, sk, skb, &seq,
20138c2ecf20Sopenharmony_ci							    copybuf_len);
20148c2ecf20Sopenharmony_ci
20158c2ecf20Sopenharmony_ci	if (length + copylen) {
20168c2ecf20Sopenharmony_ci		WRITE_ONCE(tp->copied_seq, seq);
20178c2ecf20Sopenharmony_ci		tcp_rcv_space_adjust(sk);
20188c2ecf20Sopenharmony_ci
20198c2ecf20Sopenharmony_ci		/* Clean up data we have read: This will do ACK frames. */
20208c2ecf20Sopenharmony_ci		tcp_recv_skb(sk, seq, &offset);
20218c2ecf20Sopenharmony_ci		tcp_cleanup_rbuf(sk, length + copylen);
20228c2ecf20Sopenharmony_ci		ret = 0;
20238c2ecf20Sopenharmony_ci		if (length == zc->length)
20248c2ecf20Sopenharmony_ci			zc->recv_skip_hint = 0;
20258c2ecf20Sopenharmony_ci	} else {
20268c2ecf20Sopenharmony_ci		if (!zc->recv_skip_hint && sock_flag(sk, SOCK_DONE))
20278c2ecf20Sopenharmony_ci			ret = -EIO;
20288c2ecf20Sopenharmony_ci	}
20298c2ecf20Sopenharmony_ci	zc->length = length;
20308c2ecf20Sopenharmony_ci	return ret;
20318c2ecf20Sopenharmony_ci}
20328c2ecf20Sopenharmony_ci#endif
20338c2ecf20Sopenharmony_ci
20348c2ecf20Sopenharmony_cistatic void tcp_update_recv_tstamps(struct sk_buff *skb,
20358c2ecf20Sopenharmony_ci				    struct scm_timestamping_internal *tss)
20368c2ecf20Sopenharmony_ci{
20378c2ecf20Sopenharmony_ci	if (skb->tstamp)
20388c2ecf20Sopenharmony_ci		tss->ts[0] = ktime_to_timespec64(skb->tstamp);
20398c2ecf20Sopenharmony_ci	else
20408c2ecf20Sopenharmony_ci		tss->ts[0] = (struct timespec64) {0};
20418c2ecf20Sopenharmony_ci
20428c2ecf20Sopenharmony_ci	if (skb_hwtstamps(skb)->hwtstamp)
20438c2ecf20Sopenharmony_ci		tss->ts[2] = ktime_to_timespec64(skb_hwtstamps(skb)->hwtstamp);
20448c2ecf20Sopenharmony_ci	else
20458c2ecf20Sopenharmony_ci		tss->ts[2] = (struct timespec64) {0};
20468c2ecf20Sopenharmony_ci}
20478c2ecf20Sopenharmony_ci
20488c2ecf20Sopenharmony_ci/* Similar to __sock_recv_timestamp, but does not require an skb */
20498c2ecf20Sopenharmony_cistatic void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk,
20508c2ecf20Sopenharmony_ci			       struct scm_timestamping_internal *tss)
20518c2ecf20Sopenharmony_ci{
20528c2ecf20Sopenharmony_ci	int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW);
20538c2ecf20Sopenharmony_ci	bool has_timestamping = false;
20548c2ecf20Sopenharmony_ci
20558c2ecf20Sopenharmony_ci	if (tss->ts[0].tv_sec || tss->ts[0].tv_nsec) {
20568c2ecf20Sopenharmony_ci		if (sock_flag(sk, SOCK_RCVTSTAMP)) {
20578c2ecf20Sopenharmony_ci			if (sock_flag(sk, SOCK_RCVTSTAMPNS)) {
20588c2ecf20Sopenharmony_ci				if (new_tstamp) {
20598c2ecf20Sopenharmony_ci					struct __kernel_timespec kts = {
20608c2ecf20Sopenharmony_ci						.tv_sec = tss->ts[0].tv_sec,
20618c2ecf20Sopenharmony_ci						.tv_nsec = tss->ts[0].tv_nsec,
20628c2ecf20Sopenharmony_ci					};
20638c2ecf20Sopenharmony_ci					put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_NEW,
20648c2ecf20Sopenharmony_ci						 sizeof(kts), &kts);
20658c2ecf20Sopenharmony_ci				} else {
20668c2ecf20Sopenharmony_ci					struct __kernel_old_timespec ts_old = {
20678c2ecf20Sopenharmony_ci						.tv_sec = tss->ts[0].tv_sec,
20688c2ecf20Sopenharmony_ci						.tv_nsec = tss->ts[0].tv_nsec,
20698c2ecf20Sopenharmony_ci					};
20708c2ecf20Sopenharmony_ci					put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_OLD,
20718c2ecf20Sopenharmony_ci						 sizeof(ts_old), &ts_old);
20728c2ecf20Sopenharmony_ci				}
20738c2ecf20Sopenharmony_ci			} else {
20748c2ecf20Sopenharmony_ci				if (new_tstamp) {
20758c2ecf20Sopenharmony_ci					struct __kernel_sock_timeval stv = {
20768c2ecf20Sopenharmony_ci						.tv_sec = tss->ts[0].tv_sec,
20778c2ecf20Sopenharmony_ci						.tv_usec = tss->ts[0].tv_nsec / 1000,
20788c2ecf20Sopenharmony_ci					};
20798c2ecf20Sopenharmony_ci					put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_NEW,
20808c2ecf20Sopenharmony_ci						 sizeof(stv), &stv);
20818c2ecf20Sopenharmony_ci				} else {
20828c2ecf20Sopenharmony_ci					struct __kernel_old_timeval tv = {
20838c2ecf20Sopenharmony_ci						.tv_sec = tss->ts[0].tv_sec,
20848c2ecf20Sopenharmony_ci						.tv_usec = tss->ts[0].tv_nsec / 1000,
20858c2ecf20Sopenharmony_ci					};
20868c2ecf20Sopenharmony_ci					put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD,
20878c2ecf20Sopenharmony_ci						 sizeof(tv), &tv);
20888c2ecf20Sopenharmony_ci				}
20898c2ecf20Sopenharmony_ci			}
20908c2ecf20Sopenharmony_ci		}
20918c2ecf20Sopenharmony_ci
20928c2ecf20Sopenharmony_ci		if (sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE)
20938c2ecf20Sopenharmony_ci			has_timestamping = true;
20948c2ecf20Sopenharmony_ci		else
20958c2ecf20Sopenharmony_ci			tss->ts[0] = (struct timespec64) {0};
20968c2ecf20Sopenharmony_ci	}
20978c2ecf20Sopenharmony_ci
20988c2ecf20Sopenharmony_ci	if (tss->ts[2].tv_sec || tss->ts[2].tv_nsec) {
20998c2ecf20Sopenharmony_ci		if (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE)
21008c2ecf20Sopenharmony_ci			has_timestamping = true;
21018c2ecf20Sopenharmony_ci		else
21028c2ecf20Sopenharmony_ci			tss->ts[2] = (struct timespec64) {0};
21038c2ecf20Sopenharmony_ci	}
21048c2ecf20Sopenharmony_ci
21058c2ecf20Sopenharmony_ci	if (has_timestamping) {
21068c2ecf20Sopenharmony_ci		tss->ts[1] = (struct timespec64) {0};
21078c2ecf20Sopenharmony_ci		if (sock_flag(sk, SOCK_TSTAMP_NEW))
21088c2ecf20Sopenharmony_ci			put_cmsg_scm_timestamping64(msg, tss);
21098c2ecf20Sopenharmony_ci		else
21108c2ecf20Sopenharmony_ci			put_cmsg_scm_timestamping(msg, tss);
21118c2ecf20Sopenharmony_ci	}
21128c2ecf20Sopenharmony_ci}
21138c2ecf20Sopenharmony_ci
21148c2ecf20Sopenharmony_cistatic int tcp_inq_hint(struct sock *sk)
21158c2ecf20Sopenharmony_ci{
21168c2ecf20Sopenharmony_ci	const struct tcp_sock *tp = tcp_sk(sk);
21178c2ecf20Sopenharmony_ci	u32 copied_seq = READ_ONCE(tp->copied_seq);
21188c2ecf20Sopenharmony_ci	u32 rcv_nxt = READ_ONCE(tp->rcv_nxt);
21198c2ecf20Sopenharmony_ci	int inq;
21208c2ecf20Sopenharmony_ci
21218c2ecf20Sopenharmony_ci	inq = rcv_nxt - copied_seq;
21228c2ecf20Sopenharmony_ci	if (unlikely(inq < 0 || copied_seq != READ_ONCE(tp->copied_seq))) {
21238c2ecf20Sopenharmony_ci		lock_sock(sk);
21248c2ecf20Sopenharmony_ci		inq = tp->rcv_nxt - tp->copied_seq;
21258c2ecf20Sopenharmony_ci		release_sock(sk);
21268c2ecf20Sopenharmony_ci	}
21278c2ecf20Sopenharmony_ci	/* After receiving a FIN, tell the user-space to continue reading
21288c2ecf20Sopenharmony_ci	 * by returning a non-zero inq.
21298c2ecf20Sopenharmony_ci	 */
21308c2ecf20Sopenharmony_ci	if (inq == 0 && sock_flag(sk, SOCK_DONE))
21318c2ecf20Sopenharmony_ci		inq = 1;
21328c2ecf20Sopenharmony_ci	return inq;
21338c2ecf20Sopenharmony_ci}
21348c2ecf20Sopenharmony_ci
21358c2ecf20Sopenharmony_ci/*
21368c2ecf20Sopenharmony_ci *	This routine copies from a sock struct into the user buffer.
21378c2ecf20Sopenharmony_ci *
21388c2ecf20Sopenharmony_ci *	Technical note: in 2.3 we work on _locked_ socket, so that
21398c2ecf20Sopenharmony_ci *	tricks with *seq access order and skb->users are not required.
21408c2ecf20Sopenharmony_ci *	Probably, code can be easily improved even more.
21418c2ecf20Sopenharmony_ci */
21428c2ecf20Sopenharmony_ci
21438c2ecf20Sopenharmony_ciint tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
21448c2ecf20Sopenharmony_ci		int flags, int *addr_len)
21458c2ecf20Sopenharmony_ci{
21468c2ecf20Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
21478c2ecf20Sopenharmony_ci	int copied = 0;
21488c2ecf20Sopenharmony_ci	u32 peek_seq;
21498c2ecf20Sopenharmony_ci	u32 *seq;
21508c2ecf20Sopenharmony_ci	unsigned long used;
21518c2ecf20Sopenharmony_ci	int err, inq;
21528c2ecf20Sopenharmony_ci	int target;		/* Read at least this many bytes */
21538c2ecf20Sopenharmony_ci	long timeo;
21548c2ecf20Sopenharmony_ci	struct sk_buff *skb, *last;
21558c2ecf20Sopenharmony_ci	u32 urg_hole = 0;
21568c2ecf20Sopenharmony_ci	struct scm_timestamping_internal tss;
21578c2ecf20Sopenharmony_ci	int cmsg_flags;
21588c2ecf20Sopenharmony_ci
21598c2ecf20Sopenharmony_ci	if (unlikely(flags & MSG_ERRQUEUE))
21608c2ecf20Sopenharmony_ci		return inet_recv_error(sk, msg, len, addr_len);
21618c2ecf20Sopenharmony_ci
21628c2ecf20Sopenharmony_ci	if (sk_can_busy_loop(sk) && skb_queue_empty_lockless(&sk->sk_receive_queue) &&
21638c2ecf20Sopenharmony_ci	    (sk->sk_state == TCP_ESTABLISHED))
21648c2ecf20Sopenharmony_ci		sk_busy_loop(sk, nonblock);
21658c2ecf20Sopenharmony_ci
21668c2ecf20Sopenharmony_ci	lock_sock(sk);
21678c2ecf20Sopenharmony_ci
21688c2ecf20Sopenharmony_ci	err = -ENOTCONN;
21698c2ecf20Sopenharmony_ci	if (sk->sk_state == TCP_LISTEN)
21708c2ecf20Sopenharmony_ci		goto out;
21718c2ecf20Sopenharmony_ci
21728c2ecf20Sopenharmony_ci	cmsg_flags = tp->recvmsg_inq ? 1 : 0;
21738c2ecf20Sopenharmony_ci	timeo = sock_rcvtimeo(sk, nonblock);
21748c2ecf20Sopenharmony_ci
21758c2ecf20Sopenharmony_ci	/* Urgent data needs to be handled specially. */
21768c2ecf20Sopenharmony_ci	if (flags & MSG_OOB)
21778c2ecf20Sopenharmony_ci		goto recv_urg;
21788c2ecf20Sopenharmony_ci
21798c2ecf20Sopenharmony_ci	if (unlikely(tp->repair)) {
21808c2ecf20Sopenharmony_ci		err = -EPERM;
21818c2ecf20Sopenharmony_ci		if (!(flags & MSG_PEEK))
21828c2ecf20Sopenharmony_ci			goto out;
21838c2ecf20Sopenharmony_ci
21848c2ecf20Sopenharmony_ci		if (tp->repair_queue == TCP_SEND_QUEUE)
21858c2ecf20Sopenharmony_ci			goto recv_sndq;
21868c2ecf20Sopenharmony_ci
21878c2ecf20Sopenharmony_ci		err = -EINVAL;
21888c2ecf20Sopenharmony_ci		if (tp->repair_queue == TCP_NO_QUEUE)
21898c2ecf20Sopenharmony_ci			goto out;
21908c2ecf20Sopenharmony_ci
21918c2ecf20Sopenharmony_ci		/* 'common' recv queue MSG_PEEK-ing */
21928c2ecf20Sopenharmony_ci	}
21938c2ecf20Sopenharmony_ci
21948c2ecf20Sopenharmony_ci	seq = &tp->copied_seq;
21958c2ecf20Sopenharmony_ci	if (flags & MSG_PEEK) {
21968c2ecf20Sopenharmony_ci		peek_seq = tp->copied_seq;
21978c2ecf20Sopenharmony_ci		seq = &peek_seq;
21988c2ecf20Sopenharmony_ci	}
21998c2ecf20Sopenharmony_ci
22008c2ecf20Sopenharmony_ci	target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
22018c2ecf20Sopenharmony_ci
22028c2ecf20Sopenharmony_ci	do {
22038c2ecf20Sopenharmony_ci		u32 offset;
22048c2ecf20Sopenharmony_ci
22058c2ecf20Sopenharmony_ci		/* Are we at urgent data? Stop if we have read anything or have SIGURG pending. */
22068c2ecf20Sopenharmony_ci		if (tp->urg_data && tp->urg_seq == *seq) {
22078c2ecf20Sopenharmony_ci			if (copied)
22088c2ecf20Sopenharmony_ci				break;
22098c2ecf20Sopenharmony_ci			if (signal_pending(current)) {
22108c2ecf20Sopenharmony_ci				copied = timeo ? sock_intr_errno(timeo) : -EAGAIN;
22118c2ecf20Sopenharmony_ci				break;
22128c2ecf20Sopenharmony_ci			}
22138c2ecf20Sopenharmony_ci		}
22148c2ecf20Sopenharmony_ci
22158c2ecf20Sopenharmony_ci		/* Next get a buffer. */
22168c2ecf20Sopenharmony_ci
22178c2ecf20Sopenharmony_ci		last = skb_peek_tail(&sk->sk_receive_queue);
22188c2ecf20Sopenharmony_ci		skb_queue_walk(&sk->sk_receive_queue, skb) {
22198c2ecf20Sopenharmony_ci			last = skb;
22208c2ecf20Sopenharmony_ci			/* Now that we have two receive queues this
22218c2ecf20Sopenharmony_ci			 * shouldn't happen.
22228c2ecf20Sopenharmony_ci			 */
22238c2ecf20Sopenharmony_ci			if (WARN(before(*seq, TCP_SKB_CB(skb)->seq),
22248c2ecf20Sopenharmony_ci				 "TCP recvmsg seq # bug: copied %X, seq %X, rcvnxt %X, fl %X\n",
22258c2ecf20Sopenharmony_ci				 *seq, TCP_SKB_CB(skb)->seq, tp->rcv_nxt,
22268c2ecf20Sopenharmony_ci				 flags))
22278c2ecf20Sopenharmony_ci				break;
22288c2ecf20Sopenharmony_ci
22298c2ecf20Sopenharmony_ci			offset = *seq - TCP_SKB_CB(skb)->seq;
22308c2ecf20Sopenharmony_ci			if (unlikely(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) {
22318c2ecf20Sopenharmony_ci				pr_err_once("%s: found a SYN, please report !\n", __func__);
22328c2ecf20Sopenharmony_ci				offset--;
22338c2ecf20Sopenharmony_ci			}
22348c2ecf20Sopenharmony_ci			if (offset < skb->len)
22358c2ecf20Sopenharmony_ci				goto found_ok_skb;
22368c2ecf20Sopenharmony_ci			if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
22378c2ecf20Sopenharmony_ci				goto found_fin_ok;
22388c2ecf20Sopenharmony_ci			WARN(!(flags & MSG_PEEK),
22398c2ecf20Sopenharmony_ci			     "TCP recvmsg seq # bug 2: copied %X, seq %X, rcvnxt %X, fl %X\n",
22408c2ecf20Sopenharmony_ci			     *seq, TCP_SKB_CB(skb)->seq, tp->rcv_nxt, flags);
22418c2ecf20Sopenharmony_ci		}
22428c2ecf20Sopenharmony_ci
22438c2ecf20Sopenharmony_ci		/* Well, if we have backlog, try to process it now yet. */
22448c2ecf20Sopenharmony_ci
22458c2ecf20Sopenharmony_ci		if (copied >= target && !READ_ONCE(sk->sk_backlog.tail))
22468c2ecf20Sopenharmony_ci			break;
22478c2ecf20Sopenharmony_ci
22488c2ecf20Sopenharmony_ci		if (copied) {
22498c2ecf20Sopenharmony_ci			if (sk->sk_err ||
22508c2ecf20Sopenharmony_ci			    sk->sk_state == TCP_CLOSE ||
22518c2ecf20Sopenharmony_ci			    (sk->sk_shutdown & RCV_SHUTDOWN) ||
22528c2ecf20Sopenharmony_ci			    !timeo ||
22538c2ecf20Sopenharmony_ci			    signal_pending(current))
22548c2ecf20Sopenharmony_ci				break;
22558c2ecf20Sopenharmony_ci		} else {
22568c2ecf20Sopenharmony_ci			if (sock_flag(sk, SOCK_DONE))
22578c2ecf20Sopenharmony_ci				break;
22588c2ecf20Sopenharmony_ci
22598c2ecf20Sopenharmony_ci			if (sk->sk_err) {
22608c2ecf20Sopenharmony_ci				copied = sock_error(sk);
22618c2ecf20Sopenharmony_ci				break;
22628c2ecf20Sopenharmony_ci			}
22638c2ecf20Sopenharmony_ci
22648c2ecf20Sopenharmony_ci			if (sk->sk_shutdown & RCV_SHUTDOWN)
22658c2ecf20Sopenharmony_ci				break;
22668c2ecf20Sopenharmony_ci
22678c2ecf20Sopenharmony_ci			if (sk->sk_state == TCP_CLOSE) {
22688c2ecf20Sopenharmony_ci				/* This occurs when user tries to read
22698c2ecf20Sopenharmony_ci				 * from never connected socket.
22708c2ecf20Sopenharmony_ci				 */
22718c2ecf20Sopenharmony_ci				copied = -ENOTCONN;
22728c2ecf20Sopenharmony_ci				break;
22738c2ecf20Sopenharmony_ci			}
22748c2ecf20Sopenharmony_ci
22758c2ecf20Sopenharmony_ci			if (!timeo) {
22768c2ecf20Sopenharmony_ci				copied = -EAGAIN;
22778c2ecf20Sopenharmony_ci				break;
22788c2ecf20Sopenharmony_ci			}
22798c2ecf20Sopenharmony_ci
22808c2ecf20Sopenharmony_ci			if (signal_pending(current)) {
22818c2ecf20Sopenharmony_ci				copied = sock_intr_errno(timeo);
22828c2ecf20Sopenharmony_ci				break;
22838c2ecf20Sopenharmony_ci			}
22848c2ecf20Sopenharmony_ci		}
22858c2ecf20Sopenharmony_ci
22868c2ecf20Sopenharmony_ci		tcp_cleanup_rbuf(sk, copied);
22878c2ecf20Sopenharmony_ci
22888c2ecf20Sopenharmony_ci		if (copied >= target) {
22898c2ecf20Sopenharmony_ci			/* Do not sleep, just process backlog. */
22908c2ecf20Sopenharmony_ci			release_sock(sk);
22918c2ecf20Sopenharmony_ci			lock_sock(sk);
22928c2ecf20Sopenharmony_ci		} else {
22938c2ecf20Sopenharmony_ci			sk_wait_data(sk, &timeo, last);
22948c2ecf20Sopenharmony_ci		}
22958c2ecf20Sopenharmony_ci
22968c2ecf20Sopenharmony_ci		if ((flags & MSG_PEEK) &&
22978c2ecf20Sopenharmony_ci		    (peek_seq - copied - urg_hole != tp->copied_seq)) {
22988c2ecf20Sopenharmony_ci			net_dbg_ratelimited("TCP(%s:%d): Application bug, race in MSG_PEEK\n",
22998c2ecf20Sopenharmony_ci					    current->comm,
23008c2ecf20Sopenharmony_ci					    task_pid_nr(current));
23018c2ecf20Sopenharmony_ci			peek_seq = tp->copied_seq;
23028c2ecf20Sopenharmony_ci		}
23038c2ecf20Sopenharmony_ci		continue;
23048c2ecf20Sopenharmony_ci
23058c2ecf20Sopenharmony_cifound_ok_skb:
23068c2ecf20Sopenharmony_ci		/* Ok so how much can we use? */
23078c2ecf20Sopenharmony_ci		used = skb->len - offset;
23088c2ecf20Sopenharmony_ci		if (len < used)
23098c2ecf20Sopenharmony_ci			used = len;
23108c2ecf20Sopenharmony_ci
23118c2ecf20Sopenharmony_ci		/* Do we have urgent data here? */
23128c2ecf20Sopenharmony_ci		if (tp->urg_data) {
23138c2ecf20Sopenharmony_ci			u32 urg_offset = tp->urg_seq - *seq;
23148c2ecf20Sopenharmony_ci			if (urg_offset < used) {
23158c2ecf20Sopenharmony_ci				if (!urg_offset) {
23168c2ecf20Sopenharmony_ci					if (!sock_flag(sk, SOCK_URGINLINE)) {
23178c2ecf20Sopenharmony_ci						WRITE_ONCE(*seq, *seq + 1);
23188c2ecf20Sopenharmony_ci						urg_hole++;
23198c2ecf20Sopenharmony_ci						offset++;
23208c2ecf20Sopenharmony_ci						used--;
23218c2ecf20Sopenharmony_ci						if (!used)
23228c2ecf20Sopenharmony_ci							goto skip_copy;
23238c2ecf20Sopenharmony_ci					}
23248c2ecf20Sopenharmony_ci				} else
23258c2ecf20Sopenharmony_ci					used = urg_offset;
23268c2ecf20Sopenharmony_ci			}
23278c2ecf20Sopenharmony_ci		}
23288c2ecf20Sopenharmony_ci
23298c2ecf20Sopenharmony_ci		if (!(flags & MSG_TRUNC)) {
23308c2ecf20Sopenharmony_ci			err = skb_copy_datagram_msg(skb, offset, msg, used);
23318c2ecf20Sopenharmony_ci			if (err) {
23328c2ecf20Sopenharmony_ci				/* Exception. Bailout! */
23338c2ecf20Sopenharmony_ci				if (!copied)
23348c2ecf20Sopenharmony_ci					copied = -EFAULT;
23358c2ecf20Sopenharmony_ci				break;
23368c2ecf20Sopenharmony_ci			}
23378c2ecf20Sopenharmony_ci		}
23388c2ecf20Sopenharmony_ci
23398c2ecf20Sopenharmony_ci		WRITE_ONCE(*seq, *seq + used);
23408c2ecf20Sopenharmony_ci		copied += used;
23418c2ecf20Sopenharmony_ci		len -= used;
23428c2ecf20Sopenharmony_ci
23438c2ecf20Sopenharmony_ci		tcp_rcv_space_adjust(sk);
23448c2ecf20Sopenharmony_ci
23458c2ecf20Sopenharmony_ciskip_copy:
23468c2ecf20Sopenharmony_ci		if (tp->urg_data && after(tp->copied_seq, tp->urg_seq)) {
23478c2ecf20Sopenharmony_ci			tp->urg_data = 0;
23488c2ecf20Sopenharmony_ci			tcp_fast_path_check(sk);
23498c2ecf20Sopenharmony_ci		}
23508c2ecf20Sopenharmony_ci
23518c2ecf20Sopenharmony_ci		if (TCP_SKB_CB(skb)->has_rxtstamp) {
23528c2ecf20Sopenharmony_ci			tcp_update_recv_tstamps(skb, &tss);
23538c2ecf20Sopenharmony_ci			cmsg_flags |= 2;
23548c2ecf20Sopenharmony_ci		}
23558c2ecf20Sopenharmony_ci
23568c2ecf20Sopenharmony_ci		if (used + offset < skb->len)
23578c2ecf20Sopenharmony_ci			continue;
23588c2ecf20Sopenharmony_ci
23598c2ecf20Sopenharmony_ci		if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
23608c2ecf20Sopenharmony_ci			goto found_fin_ok;
23618c2ecf20Sopenharmony_ci		if (!(flags & MSG_PEEK))
23628c2ecf20Sopenharmony_ci			sk_eat_skb(sk, skb);
23638c2ecf20Sopenharmony_ci		continue;
23648c2ecf20Sopenharmony_ci
23658c2ecf20Sopenharmony_cifound_fin_ok:
23668c2ecf20Sopenharmony_ci		/* Process the FIN. */
23678c2ecf20Sopenharmony_ci		WRITE_ONCE(*seq, *seq + 1);
23688c2ecf20Sopenharmony_ci		if (!(flags & MSG_PEEK))
23698c2ecf20Sopenharmony_ci			sk_eat_skb(sk, skb);
23708c2ecf20Sopenharmony_ci		break;
23718c2ecf20Sopenharmony_ci	} while (len > 0);
23728c2ecf20Sopenharmony_ci
23738c2ecf20Sopenharmony_ci	/* According to UNIX98, msg_name/msg_namelen are ignored
23748c2ecf20Sopenharmony_ci	 * on connected socket. I was just happy when found this 8) --ANK
23758c2ecf20Sopenharmony_ci	 */
23768c2ecf20Sopenharmony_ci
23778c2ecf20Sopenharmony_ci	/* Clean up data we have read: This will do ACK frames. */
23788c2ecf20Sopenharmony_ci	tcp_cleanup_rbuf(sk, copied);
23798c2ecf20Sopenharmony_ci
23808c2ecf20Sopenharmony_ci	release_sock(sk);
23818c2ecf20Sopenharmony_ci
23828c2ecf20Sopenharmony_ci	if (cmsg_flags) {
23838c2ecf20Sopenharmony_ci		if (cmsg_flags & 2)
23848c2ecf20Sopenharmony_ci			tcp_recv_timestamp(msg, sk, &tss);
23858c2ecf20Sopenharmony_ci		if (cmsg_flags & 1) {
23868c2ecf20Sopenharmony_ci			inq = tcp_inq_hint(sk);
23878c2ecf20Sopenharmony_ci			put_cmsg(msg, SOL_TCP, TCP_CM_INQ, sizeof(inq), &inq);
23888c2ecf20Sopenharmony_ci		}
23898c2ecf20Sopenharmony_ci	}
23908c2ecf20Sopenharmony_ci
23918c2ecf20Sopenharmony_ci	return copied;
23928c2ecf20Sopenharmony_ci
23938c2ecf20Sopenharmony_ciout:
23948c2ecf20Sopenharmony_ci	release_sock(sk);
23958c2ecf20Sopenharmony_ci	return err;
23968c2ecf20Sopenharmony_ci
23978c2ecf20Sopenharmony_cirecv_urg:
23988c2ecf20Sopenharmony_ci	err = tcp_recv_urg(sk, msg, len, flags);
23998c2ecf20Sopenharmony_ci	goto out;
24008c2ecf20Sopenharmony_ci
24018c2ecf20Sopenharmony_cirecv_sndq:
24028c2ecf20Sopenharmony_ci	err = tcp_peek_sndq(sk, msg, len);
24038c2ecf20Sopenharmony_ci	goto out;
24048c2ecf20Sopenharmony_ci}
24058c2ecf20Sopenharmony_ciEXPORT_SYMBOL(tcp_recvmsg);
24068c2ecf20Sopenharmony_ci
24078c2ecf20Sopenharmony_civoid tcp_set_state(struct sock *sk, int state)
24088c2ecf20Sopenharmony_ci{
24098c2ecf20Sopenharmony_ci	int oldstate = sk->sk_state;
24108c2ecf20Sopenharmony_ci
24118c2ecf20Sopenharmony_ci	/* We defined a new enum for TCP states that are exported in BPF
24128c2ecf20Sopenharmony_ci	 * so as not force the internal TCP states to be frozen. The
24138c2ecf20Sopenharmony_ci	 * following checks will detect if an internal state value ever
24148c2ecf20Sopenharmony_ci	 * differs from the BPF value. If this ever happens, then we will
24158c2ecf20Sopenharmony_ci	 * need to remap the internal value to the BPF value before calling
24168c2ecf20Sopenharmony_ci	 * tcp_call_bpf_2arg.
24178c2ecf20Sopenharmony_ci	 */
24188c2ecf20Sopenharmony_ci	BUILD_BUG_ON((int)BPF_TCP_ESTABLISHED != (int)TCP_ESTABLISHED);
24198c2ecf20Sopenharmony_ci	BUILD_BUG_ON((int)BPF_TCP_SYN_SENT != (int)TCP_SYN_SENT);
24208c2ecf20Sopenharmony_ci	BUILD_BUG_ON((int)BPF_TCP_SYN_RECV != (int)TCP_SYN_RECV);
24218c2ecf20Sopenharmony_ci	BUILD_BUG_ON((int)BPF_TCP_FIN_WAIT1 != (int)TCP_FIN_WAIT1);
24228c2ecf20Sopenharmony_ci	BUILD_BUG_ON((int)BPF_TCP_FIN_WAIT2 != (int)TCP_FIN_WAIT2);
24238c2ecf20Sopenharmony_ci	BUILD_BUG_ON((int)BPF_TCP_TIME_WAIT != (int)TCP_TIME_WAIT);
24248c2ecf20Sopenharmony_ci	BUILD_BUG_ON((int)BPF_TCP_CLOSE != (int)TCP_CLOSE);
24258c2ecf20Sopenharmony_ci	BUILD_BUG_ON((int)BPF_TCP_CLOSE_WAIT != (int)TCP_CLOSE_WAIT);
24268c2ecf20Sopenharmony_ci	BUILD_BUG_ON((int)BPF_TCP_LAST_ACK != (int)TCP_LAST_ACK);
24278c2ecf20Sopenharmony_ci	BUILD_BUG_ON((int)BPF_TCP_LISTEN != (int)TCP_LISTEN);
24288c2ecf20Sopenharmony_ci	BUILD_BUG_ON((int)BPF_TCP_CLOSING != (int)TCP_CLOSING);
24298c2ecf20Sopenharmony_ci	BUILD_BUG_ON((int)BPF_TCP_NEW_SYN_RECV != (int)TCP_NEW_SYN_RECV);
24308c2ecf20Sopenharmony_ci	BUILD_BUG_ON((int)BPF_TCP_MAX_STATES != (int)TCP_MAX_STATES);
24318c2ecf20Sopenharmony_ci
24328c2ecf20Sopenharmony_ci	if (BPF_SOCK_OPS_TEST_FLAG(tcp_sk(sk), BPF_SOCK_OPS_STATE_CB_FLAG))
24338c2ecf20Sopenharmony_ci		tcp_call_bpf_2arg(sk, BPF_SOCK_OPS_STATE_CB, oldstate, state);
24348c2ecf20Sopenharmony_ci
24358c2ecf20Sopenharmony_ci	switch (state) {
24368c2ecf20Sopenharmony_ci	case TCP_ESTABLISHED:
24378c2ecf20Sopenharmony_ci		if (oldstate != TCP_ESTABLISHED)
24388c2ecf20Sopenharmony_ci			TCP_INC_STATS(sock_net(sk), TCP_MIB_CURRESTAB);
24398c2ecf20Sopenharmony_ci		break;
24408c2ecf20Sopenharmony_ci
24418c2ecf20Sopenharmony_ci	case TCP_CLOSE:
24428c2ecf20Sopenharmony_ci		if (oldstate == TCP_CLOSE_WAIT || oldstate == TCP_ESTABLISHED)
24438c2ecf20Sopenharmony_ci			TCP_INC_STATS(sock_net(sk), TCP_MIB_ESTABRESETS);
24448c2ecf20Sopenharmony_ci
24458c2ecf20Sopenharmony_ci		sk->sk_prot->unhash(sk);
24468c2ecf20Sopenharmony_ci		if (inet_csk(sk)->icsk_bind_hash &&
24478c2ecf20Sopenharmony_ci		    !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
24488c2ecf20Sopenharmony_ci			inet_put_port(sk);
24498c2ecf20Sopenharmony_ci		fallthrough;
24508c2ecf20Sopenharmony_ci	default:
24518c2ecf20Sopenharmony_ci		if (oldstate == TCP_ESTABLISHED)
24528c2ecf20Sopenharmony_ci			TCP_DEC_STATS(sock_net(sk), TCP_MIB_CURRESTAB);
24538c2ecf20Sopenharmony_ci	}
24548c2ecf20Sopenharmony_ci
24558c2ecf20Sopenharmony_ci	/* Change state AFTER socket is unhashed to avoid closed
24568c2ecf20Sopenharmony_ci	 * socket sitting in hash tables.
24578c2ecf20Sopenharmony_ci	 */
24588c2ecf20Sopenharmony_ci	inet_sk_state_store(sk, state);
24598c2ecf20Sopenharmony_ci}
24608c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(tcp_set_state);
24618c2ecf20Sopenharmony_ci
24628c2ecf20Sopenharmony_ci/*
24638c2ecf20Sopenharmony_ci *	State processing on a close. This implements the state shift for
24648c2ecf20Sopenharmony_ci *	sending our FIN frame. Note that we only send a FIN for some
24658c2ecf20Sopenharmony_ci *	states. A shutdown() may have already sent the FIN, or we may be
24668c2ecf20Sopenharmony_ci *	closed.
24678c2ecf20Sopenharmony_ci */
24688c2ecf20Sopenharmony_ci
24698c2ecf20Sopenharmony_cistatic const unsigned char new_state[16] = {
24708c2ecf20Sopenharmony_ci  /* current state:        new state:      action:	*/
24718c2ecf20Sopenharmony_ci  [0 /* (Invalid) */]	= TCP_CLOSE,
24728c2ecf20Sopenharmony_ci  [TCP_ESTABLISHED]	= TCP_FIN_WAIT1 | TCP_ACTION_FIN,
24738c2ecf20Sopenharmony_ci  [TCP_SYN_SENT]	= TCP_CLOSE,
24748c2ecf20Sopenharmony_ci  [TCP_SYN_RECV]	= TCP_FIN_WAIT1 | TCP_ACTION_FIN,
24758c2ecf20Sopenharmony_ci  [TCP_FIN_WAIT1]	= TCP_FIN_WAIT1,
24768c2ecf20Sopenharmony_ci  [TCP_FIN_WAIT2]	= TCP_FIN_WAIT2,
24778c2ecf20Sopenharmony_ci  [TCP_TIME_WAIT]	= TCP_CLOSE,
24788c2ecf20Sopenharmony_ci  [TCP_CLOSE]		= TCP_CLOSE,
24798c2ecf20Sopenharmony_ci  [TCP_CLOSE_WAIT]	= TCP_LAST_ACK  | TCP_ACTION_FIN,
24808c2ecf20Sopenharmony_ci  [TCP_LAST_ACK]	= TCP_LAST_ACK,
24818c2ecf20Sopenharmony_ci  [TCP_LISTEN]		= TCP_CLOSE,
24828c2ecf20Sopenharmony_ci  [TCP_CLOSING]		= TCP_CLOSING,
24838c2ecf20Sopenharmony_ci  [TCP_NEW_SYN_RECV]	= TCP_CLOSE,	/* should not happen ! */
24848c2ecf20Sopenharmony_ci};
24858c2ecf20Sopenharmony_ci
24868c2ecf20Sopenharmony_cistatic int tcp_close_state(struct sock *sk)
24878c2ecf20Sopenharmony_ci{
24888c2ecf20Sopenharmony_ci	int next = (int)new_state[sk->sk_state];
24898c2ecf20Sopenharmony_ci	int ns = next & TCP_STATE_MASK;
24908c2ecf20Sopenharmony_ci
24918c2ecf20Sopenharmony_ci	tcp_set_state(sk, ns);
24928c2ecf20Sopenharmony_ci
24938c2ecf20Sopenharmony_ci	return next & TCP_ACTION_FIN;
24948c2ecf20Sopenharmony_ci}
24958c2ecf20Sopenharmony_ci
24968c2ecf20Sopenharmony_ci/*
24978c2ecf20Sopenharmony_ci *	Shutdown the sending side of a connection. Much like close except
24988c2ecf20Sopenharmony_ci *	that we don't receive shut down or sock_set_flag(sk, SOCK_DEAD).
24998c2ecf20Sopenharmony_ci */
25008c2ecf20Sopenharmony_ci
25018c2ecf20Sopenharmony_civoid tcp_shutdown(struct sock *sk, int how)
25028c2ecf20Sopenharmony_ci{
25038c2ecf20Sopenharmony_ci	/*	We need to grab some memory, and put together a FIN,
25048c2ecf20Sopenharmony_ci	 *	and then put it into the queue to be sent.
25058c2ecf20Sopenharmony_ci	 *		Tim MacKenzie(tym@dibbler.cs.monash.edu.au) 4 Dec '92.
25068c2ecf20Sopenharmony_ci	 */
25078c2ecf20Sopenharmony_ci	if (!(how & SEND_SHUTDOWN))
25088c2ecf20Sopenharmony_ci		return;
25098c2ecf20Sopenharmony_ci
25108c2ecf20Sopenharmony_ci	/* If we've already sent a FIN, or it's a closed state, skip this. */
25118c2ecf20Sopenharmony_ci	if ((1 << sk->sk_state) &
25128c2ecf20Sopenharmony_ci	    (TCPF_ESTABLISHED | TCPF_SYN_SENT |
25138c2ecf20Sopenharmony_ci	     TCPF_CLOSE_WAIT)) {
25148c2ecf20Sopenharmony_ci		/* Clear out any half completed packets.  FIN if needed. */
25158c2ecf20Sopenharmony_ci		if (tcp_close_state(sk))
25168c2ecf20Sopenharmony_ci			tcp_send_fin(sk);
25178c2ecf20Sopenharmony_ci	}
25188c2ecf20Sopenharmony_ci}
25198c2ecf20Sopenharmony_ciEXPORT_SYMBOL(tcp_shutdown);
25208c2ecf20Sopenharmony_ci
25218c2ecf20Sopenharmony_ciint tcp_orphan_count_sum(void)
25228c2ecf20Sopenharmony_ci{
25238c2ecf20Sopenharmony_ci	int i, total = 0;
25248c2ecf20Sopenharmony_ci
25258c2ecf20Sopenharmony_ci	for_each_possible_cpu(i)
25268c2ecf20Sopenharmony_ci		total += per_cpu(tcp_orphan_count, i);
25278c2ecf20Sopenharmony_ci
25288c2ecf20Sopenharmony_ci	return max(total, 0);
25298c2ecf20Sopenharmony_ci}
25308c2ecf20Sopenharmony_ci
25318c2ecf20Sopenharmony_cistatic int tcp_orphan_cache;
25328c2ecf20Sopenharmony_cistatic struct timer_list tcp_orphan_timer;
25338c2ecf20Sopenharmony_ci#define TCP_ORPHAN_TIMER_PERIOD msecs_to_jiffies(100)
25348c2ecf20Sopenharmony_ci
25358c2ecf20Sopenharmony_cistatic void tcp_orphan_update(struct timer_list *unused)
25368c2ecf20Sopenharmony_ci{
25378c2ecf20Sopenharmony_ci	WRITE_ONCE(tcp_orphan_cache, tcp_orphan_count_sum());
25388c2ecf20Sopenharmony_ci	mod_timer(&tcp_orphan_timer, jiffies + TCP_ORPHAN_TIMER_PERIOD);
25398c2ecf20Sopenharmony_ci}
25408c2ecf20Sopenharmony_ci
25418c2ecf20Sopenharmony_cistatic bool tcp_too_many_orphans(int shift)
25428c2ecf20Sopenharmony_ci{
25438c2ecf20Sopenharmony_ci	return READ_ONCE(tcp_orphan_cache) << shift >
25448c2ecf20Sopenharmony_ci		READ_ONCE(sysctl_tcp_max_orphans);
25458c2ecf20Sopenharmony_ci}
25468c2ecf20Sopenharmony_ci
25478c2ecf20Sopenharmony_cibool tcp_check_oom(struct sock *sk, int shift)
25488c2ecf20Sopenharmony_ci{
25498c2ecf20Sopenharmony_ci	bool too_many_orphans, out_of_socket_memory;
25508c2ecf20Sopenharmony_ci
25518c2ecf20Sopenharmony_ci	too_many_orphans = tcp_too_many_orphans(shift);
25528c2ecf20Sopenharmony_ci	out_of_socket_memory = tcp_out_of_memory(sk);
25538c2ecf20Sopenharmony_ci
25548c2ecf20Sopenharmony_ci	if (too_many_orphans)
25558c2ecf20Sopenharmony_ci		net_info_ratelimited("too many orphaned sockets\n");
25568c2ecf20Sopenharmony_ci	if (out_of_socket_memory)
25578c2ecf20Sopenharmony_ci		net_info_ratelimited("out of memory -- consider tuning tcp_mem\n");
25588c2ecf20Sopenharmony_ci	return too_many_orphans || out_of_socket_memory;
25598c2ecf20Sopenharmony_ci}
25608c2ecf20Sopenharmony_ci
25618c2ecf20Sopenharmony_civoid __tcp_close(struct sock *sk, long timeout)
25628c2ecf20Sopenharmony_ci{
25638c2ecf20Sopenharmony_ci	struct sk_buff *skb;
25648c2ecf20Sopenharmony_ci	int data_was_unread = 0;
25658c2ecf20Sopenharmony_ci	int state;
25668c2ecf20Sopenharmony_ci
25678c2ecf20Sopenharmony_ci	WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK);
25688c2ecf20Sopenharmony_ci
25698c2ecf20Sopenharmony_ci	if (sk->sk_state == TCP_LISTEN) {
25708c2ecf20Sopenharmony_ci		tcp_set_state(sk, TCP_CLOSE);
25718c2ecf20Sopenharmony_ci
25728c2ecf20Sopenharmony_ci		/* Special case. */
25738c2ecf20Sopenharmony_ci		inet_csk_listen_stop(sk);
25748c2ecf20Sopenharmony_ci
25758c2ecf20Sopenharmony_ci		goto adjudge_to_death;
25768c2ecf20Sopenharmony_ci	}
25778c2ecf20Sopenharmony_ci
25788c2ecf20Sopenharmony_ci	/*  We need to flush the recv. buffs.  We do this only on the
25798c2ecf20Sopenharmony_ci	 *  descriptor close, not protocol-sourced closes, because the
25808c2ecf20Sopenharmony_ci	 *  reader process may not have drained the data yet!
25818c2ecf20Sopenharmony_ci	 */
25828c2ecf20Sopenharmony_ci	while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
25838c2ecf20Sopenharmony_ci		u32 len = TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq;
25848c2ecf20Sopenharmony_ci
25858c2ecf20Sopenharmony_ci		if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
25868c2ecf20Sopenharmony_ci			len--;
25878c2ecf20Sopenharmony_ci		data_was_unread += len;
25888c2ecf20Sopenharmony_ci		__kfree_skb(skb);
25898c2ecf20Sopenharmony_ci	}
25908c2ecf20Sopenharmony_ci
25918c2ecf20Sopenharmony_ci	sk_mem_reclaim(sk);
25928c2ecf20Sopenharmony_ci
25938c2ecf20Sopenharmony_ci	/* If socket has been already reset (e.g. in tcp_reset()) - kill it. */
25948c2ecf20Sopenharmony_ci	if (sk->sk_state == TCP_CLOSE)
25958c2ecf20Sopenharmony_ci		goto adjudge_to_death;
25968c2ecf20Sopenharmony_ci
25978c2ecf20Sopenharmony_ci	/* As outlined in RFC 2525, section 2.17, we send a RST here because
25988c2ecf20Sopenharmony_ci	 * data was lost. To witness the awful effects of the old behavior of
25998c2ecf20Sopenharmony_ci	 * always doing a FIN, run an older 2.1.x kernel or 2.0.x, start a bulk
26008c2ecf20Sopenharmony_ci	 * GET in an FTP client, suspend the process, wait for the client to
26018c2ecf20Sopenharmony_ci	 * advertise a zero window, then kill -9 the FTP client, wheee...
26028c2ecf20Sopenharmony_ci	 * Note: timeout is always zero in such a case.
26038c2ecf20Sopenharmony_ci	 */
26048c2ecf20Sopenharmony_ci	if (unlikely(tcp_sk(sk)->repair)) {
26058c2ecf20Sopenharmony_ci		sk->sk_prot->disconnect(sk, 0);
26068c2ecf20Sopenharmony_ci	} else if (data_was_unread) {
26078c2ecf20Sopenharmony_ci		/* Unread data was tossed, zap the connection. */
26088c2ecf20Sopenharmony_ci		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE);
26098c2ecf20Sopenharmony_ci		tcp_set_state(sk, TCP_CLOSE);
26108c2ecf20Sopenharmony_ci		tcp_send_active_reset(sk, sk->sk_allocation);
26118c2ecf20Sopenharmony_ci	} else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
26128c2ecf20Sopenharmony_ci		/* Check zero linger _after_ checking for unread data. */
26138c2ecf20Sopenharmony_ci		sk->sk_prot->disconnect(sk, 0);
26148c2ecf20Sopenharmony_ci		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
26158c2ecf20Sopenharmony_ci	} else if (tcp_close_state(sk)) {
26168c2ecf20Sopenharmony_ci		/* We FIN if the application ate all the data before
26178c2ecf20Sopenharmony_ci		 * zapping the connection.
26188c2ecf20Sopenharmony_ci		 */
26198c2ecf20Sopenharmony_ci
26208c2ecf20Sopenharmony_ci		/* RED-PEN. Formally speaking, we have broken TCP state
26218c2ecf20Sopenharmony_ci		 * machine. State transitions:
26228c2ecf20Sopenharmony_ci		 *
26238c2ecf20Sopenharmony_ci		 * TCP_ESTABLISHED -> TCP_FIN_WAIT1
26248c2ecf20Sopenharmony_ci		 * TCP_SYN_RECV	-> TCP_FIN_WAIT1 (it is difficult)
26258c2ecf20Sopenharmony_ci		 * TCP_CLOSE_WAIT -> TCP_LAST_ACK
26268c2ecf20Sopenharmony_ci		 *
26278c2ecf20Sopenharmony_ci		 * are legal only when FIN has been sent (i.e. in window),
26288c2ecf20Sopenharmony_ci		 * rather than queued out of window. Purists blame.
26298c2ecf20Sopenharmony_ci		 *
26308c2ecf20Sopenharmony_ci		 * F.e. "RFC state" is ESTABLISHED,
26318c2ecf20Sopenharmony_ci		 * if Linux state is FIN-WAIT-1, but FIN is still not sent.
26328c2ecf20Sopenharmony_ci		 *
26338c2ecf20Sopenharmony_ci		 * The visible declinations are that sometimes
26348c2ecf20Sopenharmony_ci		 * we enter time-wait state, when it is not required really
26358c2ecf20Sopenharmony_ci		 * (harmless), do not send active resets, when they are
26368c2ecf20Sopenharmony_ci		 * required by specs (TCP_ESTABLISHED, TCP_CLOSE_WAIT, when
26378c2ecf20Sopenharmony_ci		 * they look as CLOSING or LAST_ACK for Linux)
26388c2ecf20Sopenharmony_ci		 * Probably, I missed some more holelets.
26398c2ecf20Sopenharmony_ci		 * 						--ANK
26408c2ecf20Sopenharmony_ci		 * XXX (TFO) - To start off we don't support SYN+ACK+FIN
26418c2ecf20Sopenharmony_ci		 * in a single packet! (May consider it later but will
26428c2ecf20Sopenharmony_ci		 * probably need API support or TCP_CORK SYN-ACK until
26438c2ecf20Sopenharmony_ci		 * data is written and socket is closed.)
26448c2ecf20Sopenharmony_ci		 */
26458c2ecf20Sopenharmony_ci		tcp_send_fin(sk);
26468c2ecf20Sopenharmony_ci	}
26478c2ecf20Sopenharmony_ci
26488c2ecf20Sopenharmony_ci	sk_stream_wait_close(sk, timeout);
26498c2ecf20Sopenharmony_ci
26508c2ecf20Sopenharmony_ciadjudge_to_death:
26518c2ecf20Sopenharmony_ci	state = sk->sk_state;
26528c2ecf20Sopenharmony_ci	sock_hold(sk);
26538c2ecf20Sopenharmony_ci	sock_orphan(sk);
26548c2ecf20Sopenharmony_ci
26558c2ecf20Sopenharmony_ci	local_bh_disable();
26568c2ecf20Sopenharmony_ci	bh_lock_sock(sk);
26578c2ecf20Sopenharmony_ci	/* remove backlog if any, without releasing ownership. */
26588c2ecf20Sopenharmony_ci	__release_sock(sk);
26598c2ecf20Sopenharmony_ci
26608c2ecf20Sopenharmony_ci	this_cpu_inc(tcp_orphan_count);
26618c2ecf20Sopenharmony_ci
26628c2ecf20Sopenharmony_ci	/* Have we already been destroyed by a softirq or backlog? */
26638c2ecf20Sopenharmony_ci	if (state != TCP_CLOSE && sk->sk_state == TCP_CLOSE)
26648c2ecf20Sopenharmony_ci		goto out;
26658c2ecf20Sopenharmony_ci
26668c2ecf20Sopenharmony_ci	/*	This is a (useful) BSD violating of the RFC. There is a
26678c2ecf20Sopenharmony_ci	 *	problem with TCP as specified in that the other end could
26688c2ecf20Sopenharmony_ci	 *	keep a socket open forever with no application left this end.
26698c2ecf20Sopenharmony_ci	 *	We use a 1 minute timeout (about the same as BSD) then kill
26708c2ecf20Sopenharmony_ci	 *	our end. If they send after that then tough - BUT: long enough
26718c2ecf20Sopenharmony_ci	 *	that we won't make the old 4*rto = almost no time - whoops
26728c2ecf20Sopenharmony_ci	 *	reset mistake.
26738c2ecf20Sopenharmony_ci	 *
26748c2ecf20Sopenharmony_ci	 *	Nope, it was not mistake. It is really desired behaviour
26758c2ecf20Sopenharmony_ci	 *	f.e. on http servers, when such sockets are useless, but
26768c2ecf20Sopenharmony_ci	 *	consume significant resources. Let's do it with special
26778c2ecf20Sopenharmony_ci	 *	linger2	option.					--ANK
26788c2ecf20Sopenharmony_ci	 */
26798c2ecf20Sopenharmony_ci
26808c2ecf20Sopenharmony_ci	if (sk->sk_state == TCP_FIN_WAIT2) {
26818c2ecf20Sopenharmony_ci		struct tcp_sock *tp = tcp_sk(sk);
26828c2ecf20Sopenharmony_ci		if (tp->linger2 < 0) {
26838c2ecf20Sopenharmony_ci			tcp_set_state(sk, TCP_CLOSE);
26848c2ecf20Sopenharmony_ci			tcp_send_active_reset(sk, GFP_ATOMIC);
26858c2ecf20Sopenharmony_ci			__NET_INC_STATS(sock_net(sk),
26868c2ecf20Sopenharmony_ci					LINUX_MIB_TCPABORTONLINGER);
26878c2ecf20Sopenharmony_ci		} else {
26888c2ecf20Sopenharmony_ci			const int tmo = tcp_fin_time(sk);
26898c2ecf20Sopenharmony_ci
26908c2ecf20Sopenharmony_ci			if (tmo > TCP_TIMEWAIT_LEN) {
26918c2ecf20Sopenharmony_ci				inet_csk_reset_keepalive_timer(sk,
26928c2ecf20Sopenharmony_ci						tmo - TCP_TIMEWAIT_LEN);
26938c2ecf20Sopenharmony_ci			} else {
26948c2ecf20Sopenharmony_ci				tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
26958c2ecf20Sopenharmony_ci				goto out;
26968c2ecf20Sopenharmony_ci			}
26978c2ecf20Sopenharmony_ci		}
26988c2ecf20Sopenharmony_ci	}
26998c2ecf20Sopenharmony_ci	if (sk->sk_state != TCP_CLOSE) {
27008c2ecf20Sopenharmony_ci		sk_mem_reclaim(sk);
27018c2ecf20Sopenharmony_ci		if (tcp_check_oom(sk, 0)) {
27028c2ecf20Sopenharmony_ci			tcp_set_state(sk, TCP_CLOSE);
27038c2ecf20Sopenharmony_ci			tcp_send_active_reset(sk, GFP_ATOMIC);
27048c2ecf20Sopenharmony_ci			__NET_INC_STATS(sock_net(sk),
27058c2ecf20Sopenharmony_ci					LINUX_MIB_TCPABORTONMEMORY);
27068c2ecf20Sopenharmony_ci		} else if (!check_net(sock_net(sk))) {
27078c2ecf20Sopenharmony_ci			/* Not possible to send reset; just close */
27088c2ecf20Sopenharmony_ci			tcp_set_state(sk, TCP_CLOSE);
27098c2ecf20Sopenharmony_ci		}
27108c2ecf20Sopenharmony_ci	}
27118c2ecf20Sopenharmony_ci
27128c2ecf20Sopenharmony_ci	if (sk->sk_state == TCP_CLOSE) {
27138c2ecf20Sopenharmony_ci		struct request_sock *req;
27148c2ecf20Sopenharmony_ci
27158c2ecf20Sopenharmony_ci		req = rcu_dereference_protected(tcp_sk(sk)->fastopen_rsk,
27168c2ecf20Sopenharmony_ci						lockdep_sock_is_held(sk));
27178c2ecf20Sopenharmony_ci		/* We could get here with a non-NULL req if the socket is
27188c2ecf20Sopenharmony_ci		 * aborted (e.g., closed with unread data) before 3WHS
27198c2ecf20Sopenharmony_ci		 * finishes.
27208c2ecf20Sopenharmony_ci		 */
27218c2ecf20Sopenharmony_ci		if (req)
27228c2ecf20Sopenharmony_ci			reqsk_fastopen_remove(sk, req, false);
27238c2ecf20Sopenharmony_ci		inet_csk_destroy_sock(sk);
27248c2ecf20Sopenharmony_ci	}
27258c2ecf20Sopenharmony_ci	/* Otherwise, socket is reprieved until protocol close. */
27268c2ecf20Sopenharmony_ci
27278c2ecf20Sopenharmony_ciout:
27288c2ecf20Sopenharmony_ci	bh_unlock_sock(sk);
27298c2ecf20Sopenharmony_ci	local_bh_enable();
27308c2ecf20Sopenharmony_ci}
27318c2ecf20Sopenharmony_ci
27328c2ecf20Sopenharmony_civoid tcp_close(struct sock *sk, long timeout)
27338c2ecf20Sopenharmony_ci{
27348c2ecf20Sopenharmony_ci	lock_sock(sk);
27358c2ecf20Sopenharmony_ci	__tcp_close(sk, timeout);
27368c2ecf20Sopenharmony_ci	release_sock(sk);
27378c2ecf20Sopenharmony_ci	if (!sk->sk_net_refcnt)
27388c2ecf20Sopenharmony_ci		inet_csk_clear_xmit_timers_sync(sk);
27398c2ecf20Sopenharmony_ci	sock_put(sk);
27408c2ecf20Sopenharmony_ci}
27418c2ecf20Sopenharmony_ciEXPORT_SYMBOL(tcp_close);
27428c2ecf20Sopenharmony_ci
27438c2ecf20Sopenharmony_ci/* These states need RST on ABORT according to RFC793 */
27448c2ecf20Sopenharmony_ci
27458c2ecf20Sopenharmony_cistatic inline bool tcp_need_reset(int state)
27468c2ecf20Sopenharmony_ci{
27478c2ecf20Sopenharmony_ci	return (1 << state) &
27488c2ecf20Sopenharmony_ci	       (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_FIN_WAIT1 |
27498c2ecf20Sopenharmony_ci		TCPF_FIN_WAIT2 | TCPF_SYN_RECV);
27508c2ecf20Sopenharmony_ci}
27518c2ecf20Sopenharmony_ci
27528c2ecf20Sopenharmony_cistatic void tcp_rtx_queue_purge(struct sock *sk)
27538c2ecf20Sopenharmony_ci{
27548c2ecf20Sopenharmony_ci	struct rb_node *p = rb_first(&sk->tcp_rtx_queue);
27558c2ecf20Sopenharmony_ci
27568c2ecf20Sopenharmony_ci	tcp_sk(sk)->highest_sack = NULL;
27578c2ecf20Sopenharmony_ci	while (p) {
27588c2ecf20Sopenharmony_ci		struct sk_buff *skb = rb_to_skb(p);
27598c2ecf20Sopenharmony_ci
27608c2ecf20Sopenharmony_ci		p = rb_next(p);
27618c2ecf20Sopenharmony_ci		/* Since we are deleting whole queue, no need to
27628c2ecf20Sopenharmony_ci		 * list_del(&skb->tcp_tsorted_anchor)
27638c2ecf20Sopenharmony_ci		 */
27648c2ecf20Sopenharmony_ci		tcp_rtx_queue_unlink(skb, sk);
27658c2ecf20Sopenharmony_ci		sk_wmem_free_skb(sk, skb);
27668c2ecf20Sopenharmony_ci	}
27678c2ecf20Sopenharmony_ci}
27688c2ecf20Sopenharmony_ci
27698c2ecf20Sopenharmony_civoid tcp_write_queue_purge(struct sock *sk)
27708c2ecf20Sopenharmony_ci{
27718c2ecf20Sopenharmony_ci	struct sk_buff *skb;
27728c2ecf20Sopenharmony_ci
27738c2ecf20Sopenharmony_ci	tcp_chrono_stop(sk, TCP_CHRONO_BUSY);
27748c2ecf20Sopenharmony_ci	while ((skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
27758c2ecf20Sopenharmony_ci		tcp_skb_tsorted_anchor_cleanup(skb);
27768c2ecf20Sopenharmony_ci		sk_wmem_free_skb(sk, skb);
27778c2ecf20Sopenharmony_ci	}
27788c2ecf20Sopenharmony_ci	tcp_rtx_queue_purge(sk);
27798c2ecf20Sopenharmony_ci	skb = sk->sk_tx_skb_cache;
27808c2ecf20Sopenharmony_ci	if (skb) {
27818c2ecf20Sopenharmony_ci		__kfree_skb(skb);
27828c2ecf20Sopenharmony_ci		sk->sk_tx_skb_cache = NULL;
27838c2ecf20Sopenharmony_ci	}
27848c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&tcp_sk(sk)->tsorted_sent_queue);
27858c2ecf20Sopenharmony_ci	sk_mem_reclaim(sk);
27868c2ecf20Sopenharmony_ci	tcp_clear_all_retrans_hints(tcp_sk(sk));
27878c2ecf20Sopenharmony_ci	tcp_sk(sk)->packets_out = 0;
27888c2ecf20Sopenharmony_ci	inet_csk(sk)->icsk_backoff = 0;
27898c2ecf20Sopenharmony_ci}
27908c2ecf20Sopenharmony_ci
27918c2ecf20Sopenharmony_ciint tcp_disconnect(struct sock *sk, int flags)
27928c2ecf20Sopenharmony_ci{
27938c2ecf20Sopenharmony_ci	struct inet_sock *inet = inet_sk(sk);
27948c2ecf20Sopenharmony_ci	struct inet_connection_sock *icsk = inet_csk(sk);
27958c2ecf20Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
27968c2ecf20Sopenharmony_ci	int old_state = sk->sk_state;
27978c2ecf20Sopenharmony_ci	u32 seq;
27988c2ecf20Sopenharmony_ci
27998c2ecf20Sopenharmony_ci	/* Deny disconnect if other threads are blocked in sk_wait_event()
28008c2ecf20Sopenharmony_ci	 * or inet_wait_for_connect().
28018c2ecf20Sopenharmony_ci	 */
28028c2ecf20Sopenharmony_ci	if (sk->sk_wait_pending)
28038c2ecf20Sopenharmony_ci		return -EBUSY;
28048c2ecf20Sopenharmony_ci
28058c2ecf20Sopenharmony_ci	if (old_state != TCP_CLOSE)
28068c2ecf20Sopenharmony_ci		tcp_set_state(sk, TCP_CLOSE);
28078c2ecf20Sopenharmony_ci
28088c2ecf20Sopenharmony_ci	/* ABORT function of RFC793 */
28098c2ecf20Sopenharmony_ci	if (old_state == TCP_LISTEN) {
28108c2ecf20Sopenharmony_ci		inet_csk_listen_stop(sk);
28118c2ecf20Sopenharmony_ci	} else if (unlikely(tp->repair)) {
28128c2ecf20Sopenharmony_ci		sk->sk_err = ECONNABORTED;
28138c2ecf20Sopenharmony_ci	} else if (tcp_need_reset(old_state) ||
28148c2ecf20Sopenharmony_ci		   (tp->snd_nxt != tp->write_seq &&
28158c2ecf20Sopenharmony_ci		    (1 << old_state) & (TCPF_CLOSING | TCPF_LAST_ACK))) {
28168c2ecf20Sopenharmony_ci		/* The last check adjusts for discrepancy of Linux wrt. RFC
28178c2ecf20Sopenharmony_ci		 * states
28188c2ecf20Sopenharmony_ci		 */
28198c2ecf20Sopenharmony_ci		tcp_send_active_reset(sk, gfp_any());
28208c2ecf20Sopenharmony_ci		sk->sk_err = ECONNRESET;
28218c2ecf20Sopenharmony_ci	} else if (old_state == TCP_SYN_SENT)
28228c2ecf20Sopenharmony_ci		sk->sk_err = ECONNRESET;
28238c2ecf20Sopenharmony_ci
28248c2ecf20Sopenharmony_ci	tcp_clear_xmit_timers(sk);
28258c2ecf20Sopenharmony_ci	__skb_queue_purge(&sk->sk_receive_queue);
28268c2ecf20Sopenharmony_ci	if (sk->sk_rx_skb_cache) {
28278c2ecf20Sopenharmony_ci		__kfree_skb(sk->sk_rx_skb_cache);
28288c2ecf20Sopenharmony_ci		sk->sk_rx_skb_cache = NULL;
28298c2ecf20Sopenharmony_ci	}
28308c2ecf20Sopenharmony_ci	WRITE_ONCE(tp->copied_seq, tp->rcv_nxt);
28318c2ecf20Sopenharmony_ci	tp->urg_data = 0;
28328c2ecf20Sopenharmony_ci	tcp_write_queue_purge(sk);
28338c2ecf20Sopenharmony_ci	tcp_fastopen_active_disable_ofo_check(sk);
28348c2ecf20Sopenharmony_ci	skb_rbtree_purge(&tp->out_of_order_queue);
28358c2ecf20Sopenharmony_ci
28368c2ecf20Sopenharmony_ci	inet->inet_dport = 0;
28378c2ecf20Sopenharmony_ci
28388c2ecf20Sopenharmony_ci	if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
28398c2ecf20Sopenharmony_ci		inet_reset_saddr(sk);
28408c2ecf20Sopenharmony_ci
28418c2ecf20Sopenharmony_ci	WRITE_ONCE(sk->sk_shutdown, 0);
28428c2ecf20Sopenharmony_ci	sock_reset_flag(sk, SOCK_DONE);
28438c2ecf20Sopenharmony_ci	tp->srtt_us = 0;
28448c2ecf20Sopenharmony_ci	tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT);
28458c2ecf20Sopenharmony_ci	tp->rcv_rtt_last_tsecr = 0;
28468c2ecf20Sopenharmony_ci
28478c2ecf20Sopenharmony_ci	seq = tp->write_seq + tp->max_window + 2;
28488c2ecf20Sopenharmony_ci	if (!seq)
28498c2ecf20Sopenharmony_ci		seq = 1;
28508c2ecf20Sopenharmony_ci	WRITE_ONCE(tp->write_seq, seq);
28518c2ecf20Sopenharmony_ci
28528c2ecf20Sopenharmony_ci	icsk->icsk_backoff = 0;
28538c2ecf20Sopenharmony_ci	icsk->icsk_probes_out = 0;
28548c2ecf20Sopenharmony_ci	icsk->icsk_probes_tstamp = 0;
28558c2ecf20Sopenharmony_ci	icsk->icsk_rto = TCP_TIMEOUT_INIT;
28568c2ecf20Sopenharmony_ci	icsk->icsk_rto_min = TCP_RTO_MIN;
28578c2ecf20Sopenharmony_ci	icsk->icsk_delack_max = TCP_DELACK_MAX;
28588c2ecf20Sopenharmony_ci#if defined(CONFIG_TCP_NATA_URC) || defined(CONFIG_TCP_NATA_STL)
28598c2ecf20Sopenharmony_ci	icsk->nata_retries_enabled = 0;
28608c2ecf20Sopenharmony_ci	icsk->nata_retries_type = NATA_NA;
28618c2ecf20Sopenharmony_ci	icsk->nata_syn_rto = TCP_TIMEOUT_INIT;
28628c2ecf20Sopenharmony_ci	icsk->nata_data_rto = TCP_TIMEOUT_INIT;
28638c2ecf20Sopenharmony_ci	icsk->nata_data_retries = 0;
28648c2ecf20Sopenharmony_ci#endif
28658c2ecf20Sopenharmony_ci	tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
28668c2ecf20Sopenharmony_ci	tp->snd_cwnd = TCP_INIT_CWND;
28678c2ecf20Sopenharmony_ci	tp->snd_cwnd_cnt = 0;
28688c2ecf20Sopenharmony_ci	tp->is_cwnd_limited = 0;
28698c2ecf20Sopenharmony_ci	tp->max_packets_out = 0;
28708c2ecf20Sopenharmony_ci	tp->window_clamp = 0;
28718c2ecf20Sopenharmony_ci	tp->delivered = 0;
28728c2ecf20Sopenharmony_ci	tp->delivered_ce = 0;
28738c2ecf20Sopenharmony_ci	if (icsk->icsk_ca_ops->release)
28748c2ecf20Sopenharmony_ci		icsk->icsk_ca_ops->release(sk);
28758c2ecf20Sopenharmony_ci	memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv));
28768c2ecf20Sopenharmony_ci	icsk->icsk_ca_initialized = 0;
28778c2ecf20Sopenharmony_ci	tcp_set_ca_state(sk, TCP_CA_Open);
28788c2ecf20Sopenharmony_ci	tp->is_sack_reneg = 0;
28798c2ecf20Sopenharmony_ci	tcp_clear_retrans(tp);
28808c2ecf20Sopenharmony_ci	tp->total_retrans = 0;
28818c2ecf20Sopenharmony_ci	inet_csk_delack_init(sk);
28828c2ecf20Sopenharmony_ci	/* Initialize rcv_mss to TCP_MIN_MSS to avoid division by 0
28838c2ecf20Sopenharmony_ci	 * issue in __tcp_select_window()
28848c2ecf20Sopenharmony_ci	 */
28858c2ecf20Sopenharmony_ci	icsk->icsk_ack.rcv_mss = TCP_MIN_MSS;
28868c2ecf20Sopenharmony_ci	memset(&tp->rx_opt, 0, sizeof(tp->rx_opt));
28878c2ecf20Sopenharmony_ci	__sk_dst_reset(sk);
28888c2ecf20Sopenharmony_ci	dst_release(xchg((__force struct dst_entry **)&sk->sk_rx_dst, NULL));
28898c2ecf20Sopenharmony_ci	tcp_saved_syn_free(tp);
28908c2ecf20Sopenharmony_ci	tp->compressed_ack = 0;
28918c2ecf20Sopenharmony_ci	tp->segs_in = 0;
28928c2ecf20Sopenharmony_ci	tp->segs_out = 0;
28938c2ecf20Sopenharmony_ci	tp->bytes_sent = 0;
28948c2ecf20Sopenharmony_ci	tp->bytes_acked = 0;
28958c2ecf20Sopenharmony_ci	tp->bytes_received = 0;
28968c2ecf20Sopenharmony_ci	tp->bytes_retrans = 0;
28978c2ecf20Sopenharmony_ci	tp->data_segs_in = 0;
28988c2ecf20Sopenharmony_ci	tp->data_segs_out = 0;
28998c2ecf20Sopenharmony_ci	tp->duplicate_sack[0].start_seq = 0;
29008c2ecf20Sopenharmony_ci	tp->duplicate_sack[0].end_seq = 0;
29018c2ecf20Sopenharmony_ci	tp->dsack_dups = 0;
29028c2ecf20Sopenharmony_ci	tp->reord_seen = 0;
29038c2ecf20Sopenharmony_ci	tp->retrans_out = 0;
29048c2ecf20Sopenharmony_ci	tp->sacked_out = 0;
29058c2ecf20Sopenharmony_ci	tp->tlp_high_seq = 0;
29068c2ecf20Sopenharmony_ci	tp->last_oow_ack_time = 0;
29078c2ecf20Sopenharmony_ci	/* There's a bubble in the pipe until at least the first ACK. */
29088c2ecf20Sopenharmony_ci	tp->app_limited = ~0U;
29098c2ecf20Sopenharmony_ci	tp->rate_app_limited = 1;
29108c2ecf20Sopenharmony_ci	tp->rack.mstamp = 0;
29118c2ecf20Sopenharmony_ci	tp->rack.advanced = 0;
29128c2ecf20Sopenharmony_ci	tp->rack.reo_wnd_steps = 1;
29138c2ecf20Sopenharmony_ci	tp->rack.last_delivered = 0;
29148c2ecf20Sopenharmony_ci	tp->rack.reo_wnd_persist = 0;
29158c2ecf20Sopenharmony_ci	tp->rack.dsack_seen = 0;
29168c2ecf20Sopenharmony_ci	tp->syn_data_acked = 0;
29178c2ecf20Sopenharmony_ci	tp->rx_opt.saw_tstamp = 0;
29188c2ecf20Sopenharmony_ci	tp->rx_opt.dsack = 0;
29198c2ecf20Sopenharmony_ci	tp->rx_opt.num_sacks = 0;
29208c2ecf20Sopenharmony_ci	tp->rcv_ooopack = 0;
29218c2ecf20Sopenharmony_ci
29228c2ecf20Sopenharmony_ci
29238c2ecf20Sopenharmony_ci	/* Clean up fastopen related fields */
29248c2ecf20Sopenharmony_ci	tcp_free_fastopen_req(tp);
29258c2ecf20Sopenharmony_ci	inet->defer_connect = 0;
29268c2ecf20Sopenharmony_ci	tp->fastopen_client_fail = 0;
29278c2ecf20Sopenharmony_ci
29288c2ecf20Sopenharmony_ci	WARN_ON(inet->inet_num && !icsk->icsk_bind_hash);
29298c2ecf20Sopenharmony_ci
29308c2ecf20Sopenharmony_ci	if (sk->sk_frag.page) {
29318c2ecf20Sopenharmony_ci		put_page(sk->sk_frag.page);
29328c2ecf20Sopenharmony_ci		sk->sk_frag.page = NULL;
29338c2ecf20Sopenharmony_ci		sk->sk_frag.offset = 0;
29348c2ecf20Sopenharmony_ci	}
29358c2ecf20Sopenharmony_ci
29368c2ecf20Sopenharmony_ci	sk->sk_error_report(sk);
29378c2ecf20Sopenharmony_ci	return 0;
29388c2ecf20Sopenharmony_ci}
29398c2ecf20Sopenharmony_ciEXPORT_SYMBOL(tcp_disconnect);
29408c2ecf20Sopenharmony_ci
29418c2ecf20Sopenharmony_cistatic inline bool tcp_can_repair_sock(const struct sock *sk)
29428c2ecf20Sopenharmony_ci{
29438c2ecf20Sopenharmony_ci	return ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN) &&
29448c2ecf20Sopenharmony_ci		(sk->sk_state != TCP_LISTEN);
29458c2ecf20Sopenharmony_ci}
29468c2ecf20Sopenharmony_ci
29478c2ecf20Sopenharmony_cistatic int tcp_repair_set_window(struct tcp_sock *tp, sockptr_t optbuf, int len)
29488c2ecf20Sopenharmony_ci{
29498c2ecf20Sopenharmony_ci	struct tcp_repair_window opt;
29508c2ecf20Sopenharmony_ci
29518c2ecf20Sopenharmony_ci	if (!tp->repair)
29528c2ecf20Sopenharmony_ci		return -EPERM;
29538c2ecf20Sopenharmony_ci
29548c2ecf20Sopenharmony_ci	if (len != sizeof(opt))
29558c2ecf20Sopenharmony_ci		return -EINVAL;
29568c2ecf20Sopenharmony_ci
29578c2ecf20Sopenharmony_ci	if (copy_from_sockptr(&opt, optbuf, sizeof(opt)))
29588c2ecf20Sopenharmony_ci		return -EFAULT;
29598c2ecf20Sopenharmony_ci
29608c2ecf20Sopenharmony_ci	if (opt.max_window < opt.snd_wnd)
29618c2ecf20Sopenharmony_ci		return -EINVAL;
29628c2ecf20Sopenharmony_ci
29638c2ecf20Sopenharmony_ci	if (after(opt.snd_wl1, tp->rcv_nxt + opt.rcv_wnd))
29648c2ecf20Sopenharmony_ci		return -EINVAL;
29658c2ecf20Sopenharmony_ci
29668c2ecf20Sopenharmony_ci	if (after(opt.rcv_wup, tp->rcv_nxt))
29678c2ecf20Sopenharmony_ci		return -EINVAL;
29688c2ecf20Sopenharmony_ci
29698c2ecf20Sopenharmony_ci	tp->snd_wl1	= opt.snd_wl1;
29708c2ecf20Sopenharmony_ci	tp->snd_wnd	= opt.snd_wnd;
29718c2ecf20Sopenharmony_ci	tp->max_window	= opt.max_window;
29728c2ecf20Sopenharmony_ci
29738c2ecf20Sopenharmony_ci	tp->rcv_wnd	= opt.rcv_wnd;
29748c2ecf20Sopenharmony_ci	tp->rcv_wup	= opt.rcv_wup;
29758c2ecf20Sopenharmony_ci
29768c2ecf20Sopenharmony_ci	return 0;
29778c2ecf20Sopenharmony_ci}
29788c2ecf20Sopenharmony_ci
29798c2ecf20Sopenharmony_cistatic int tcp_repair_options_est(struct sock *sk, sockptr_t optbuf,
29808c2ecf20Sopenharmony_ci		unsigned int len)
29818c2ecf20Sopenharmony_ci{
29828c2ecf20Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
29838c2ecf20Sopenharmony_ci	struct tcp_repair_opt opt;
29848c2ecf20Sopenharmony_ci	size_t offset = 0;
29858c2ecf20Sopenharmony_ci
29868c2ecf20Sopenharmony_ci	while (len >= sizeof(opt)) {
29878c2ecf20Sopenharmony_ci		if (copy_from_sockptr_offset(&opt, optbuf, offset, sizeof(opt)))
29888c2ecf20Sopenharmony_ci			return -EFAULT;
29898c2ecf20Sopenharmony_ci
29908c2ecf20Sopenharmony_ci		offset += sizeof(opt);
29918c2ecf20Sopenharmony_ci		len -= sizeof(opt);
29928c2ecf20Sopenharmony_ci
29938c2ecf20Sopenharmony_ci		switch (opt.opt_code) {
29948c2ecf20Sopenharmony_ci		case TCPOPT_MSS:
29958c2ecf20Sopenharmony_ci			tp->rx_opt.mss_clamp = opt.opt_val;
29968c2ecf20Sopenharmony_ci			tcp_mtup_init(sk);
29978c2ecf20Sopenharmony_ci			break;
29988c2ecf20Sopenharmony_ci		case TCPOPT_WINDOW:
29998c2ecf20Sopenharmony_ci			{
30008c2ecf20Sopenharmony_ci				u16 snd_wscale = opt.opt_val & 0xFFFF;
30018c2ecf20Sopenharmony_ci				u16 rcv_wscale = opt.opt_val >> 16;
30028c2ecf20Sopenharmony_ci
30038c2ecf20Sopenharmony_ci				if (snd_wscale > TCP_MAX_WSCALE || rcv_wscale > TCP_MAX_WSCALE)
30048c2ecf20Sopenharmony_ci					return -EFBIG;
30058c2ecf20Sopenharmony_ci
30068c2ecf20Sopenharmony_ci				tp->rx_opt.snd_wscale = snd_wscale;
30078c2ecf20Sopenharmony_ci				tp->rx_opt.rcv_wscale = rcv_wscale;
30088c2ecf20Sopenharmony_ci				tp->rx_opt.wscale_ok = 1;
30098c2ecf20Sopenharmony_ci			}
30108c2ecf20Sopenharmony_ci			break;
30118c2ecf20Sopenharmony_ci		case TCPOPT_SACK_PERM:
30128c2ecf20Sopenharmony_ci			if (opt.opt_val != 0)
30138c2ecf20Sopenharmony_ci				return -EINVAL;
30148c2ecf20Sopenharmony_ci
30158c2ecf20Sopenharmony_ci			tp->rx_opt.sack_ok |= TCP_SACK_SEEN;
30168c2ecf20Sopenharmony_ci			break;
30178c2ecf20Sopenharmony_ci		case TCPOPT_TIMESTAMP:
30188c2ecf20Sopenharmony_ci			if (opt.opt_val != 0)
30198c2ecf20Sopenharmony_ci				return -EINVAL;
30208c2ecf20Sopenharmony_ci
30218c2ecf20Sopenharmony_ci			tp->rx_opt.tstamp_ok = 1;
30228c2ecf20Sopenharmony_ci			break;
30238c2ecf20Sopenharmony_ci		}
30248c2ecf20Sopenharmony_ci	}
30258c2ecf20Sopenharmony_ci
30268c2ecf20Sopenharmony_ci	return 0;
30278c2ecf20Sopenharmony_ci}
30288c2ecf20Sopenharmony_ci
30298c2ecf20Sopenharmony_ciDEFINE_STATIC_KEY_FALSE(tcp_tx_delay_enabled);
30308c2ecf20Sopenharmony_ciEXPORT_SYMBOL(tcp_tx_delay_enabled);
30318c2ecf20Sopenharmony_ci
30328c2ecf20Sopenharmony_cistatic void tcp_enable_tx_delay(void)
30338c2ecf20Sopenharmony_ci{
30348c2ecf20Sopenharmony_ci	if (!static_branch_unlikely(&tcp_tx_delay_enabled)) {
30358c2ecf20Sopenharmony_ci		static int __tcp_tx_delay_enabled = 0;
30368c2ecf20Sopenharmony_ci
30378c2ecf20Sopenharmony_ci		if (cmpxchg(&__tcp_tx_delay_enabled, 0, 1) == 0) {
30388c2ecf20Sopenharmony_ci			static_branch_enable(&tcp_tx_delay_enabled);
30398c2ecf20Sopenharmony_ci			pr_info("TCP_TX_DELAY enabled\n");
30408c2ecf20Sopenharmony_ci		}
30418c2ecf20Sopenharmony_ci	}
30428c2ecf20Sopenharmony_ci}
30438c2ecf20Sopenharmony_ci
30448c2ecf20Sopenharmony_ci/* When set indicates to always queue non-full frames.  Later the user clears
30458c2ecf20Sopenharmony_ci * this option and we transmit any pending partial frames in the queue.  This is
30468c2ecf20Sopenharmony_ci * meant to be used alongside sendfile() to get properly filled frames when the
30478c2ecf20Sopenharmony_ci * user (for example) must write out headers with a write() call first and then
30488c2ecf20Sopenharmony_ci * use sendfile to send out the data parts.
30498c2ecf20Sopenharmony_ci *
30508c2ecf20Sopenharmony_ci * TCP_CORK can be set together with TCP_NODELAY and it is stronger than
30518c2ecf20Sopenharmony_ci * TCP_NODELAY.
30528c2ecf20Sopenharmony_ci */
30538c2ecf20Sopenharmony_cistatic void __tcp_sock_set_cork(struct sock *sk, bool on)
30548c2ecf20Sopenharmony_ci{
30558c2ecf20Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
30568c2ecf20Sopenharmony_ci
30578c2ecf20Sopenharmony_ci	if (on) {
30588c2ecf20Sopenharmony_ci		tp->nonagle |= TCP_NAGLE_CORK;
30598c2ecf20Sopenharmony_ci	} else {
30608c2ecf20Sopenharmony_ci		tp->nonagle &= ~TCP_NAGLE_CORK;
30618c2ecf20Sopenharmony_ci		if (tp->nonagle & TCP_NAGLE_OFF)
30628c2ecf20Sopenharmony_ci			tp->nonagle |= TCP_NAGLE_PUSH;
30638c2ecf20Sopenharmony_ci		tcp_push_pending_frames(sk);
30648c2ecf20Sopenharmony_ci	}
30658c2ecf20Sopenharmony_ci}
30668c2ecf20Sopenharmony_ci
30678c2ecf20Sopenharmony_civoid tcp_sock_set_cork(struct sock *sk, bool on)
30688c2ecf20Sopenharmony_ci{
30698c2ecf20Sopenharmony_ci	lock_sock(sk);
30708c2ecf20Sopenharmony_ci	__tcp_sock_set_cork(sk, on);
30718c2ecf20Sopenharmony_ci	release_sock(sk);
30728c2ecf20Sopenharmony_ci}
30738c2ecf20Sopenharmony_ciEXPORT_SYMBOL(tcp_sock_set_cork);
30748c2ecf20Sopenharmony_ci
30758c2ecf20Sopenharmony_ci/* TCP_NODELAY is weaker than TCP_CORK, so that this option on corked socket is
30768c2ecf20Sopenharmony_ci * remembered, but it is not activated until cork is cleared.
30778c2ecf20Sopenharmony_ci *
30788c2ecf20Sopenharmony_ci * However, when TCP_NODELAY is set we make an explicit push, which overrides
30798c2ecf20Sopenharmony_ci * even TCP_CORK for currently queued segments.
30808c2ecf20Sopenharmony_ci */
30818c2ecf20Sopenharmony_cistatic void __tcp_sock_set_nodelay(struct sock *sk, bool on)
30828c2ecf20Sopenharmony_ci{
30838c2ecf20Sopenharmony_ci	if (on) {
30848c2ecf20Sopenharmony_ci		tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF|TCP_NAGLE_PUSH;
30858c2ecf20Sopenharmony_ci		tcp_push_pending_frames(sk);
30868c2ecf20Sopenharmony_ci	} else {
30878c2ecf20Sopenharmony_ci		tcp_sk(sk)->nonagle &= ~TCP_NAGLE_OFF;
30888c2ecf20Sopenharmony_ci	}
30898c2ecf20Sopenharmony_ci}
30908c2ecf20Sopenharmony_ci
30918c2ecf20Sopenharmony_civoid tcp_sock_set_nodelay(struct sock *sk)
30928c2ecf20Sopenharmony_ci{
30938c2ecf20Sopenharmony_ci	lock_sock(sk);
30948c2ecf20Sopenharmony_ci	__tcp_sock_set_nodelay(sk, true);
30958c2ecf20Sopenharmony_ci	release_sock(sk);
30968c2ecf20Sopenharmony_ci}
30978c2ecf20Sopenharmony_ciEXPORT_SYMBOL(tcp_sock_set_nodelay);
30988c2ecf20Sopenharmony_ci
30998c2ecf20Sopenharmony_cistatic void __tcp_sock_set_quickack(struct sock *sk, int val)
31008c2ecf20Sopenharmony_ci{
31018c2ecf20Sopenharmony_ci	if (!val) {
31028c2ecf20Sopenharmony_ci		inet_csk_enter_pingpong_mode(sk);
31038c2ecf20Sopenharmony_ci		return;
31048c2ecf20Sopenharmony_ci	}
31058c2ecf20Sopenharmony_ci
31068c2ecf20Sopenharmony_ci	inet_csk_exit_pingpong_mode(sk);
31078c2ecf20Sopenharmony_ci	if ((1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) &&
31088c2ecf20Sopenharmony_ci	    inet_csk_ack_scheduled(sk)) {
31098c2ecf20Sopenharmony_ci		inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_PUSHED;
31108c2ecf20Sopenharmony_ci		tcp_cleanup_rbuf(sk, 1);
31118c2ecf20Sopenharmony_ci		if (!(val & 1))
31128c2ecf20Sopenharmony_ci			inet_csk_enter_pingpong_mode(sk);
31138c2ecf20Sopenharmony_ci	}
31148c2ecf20Sopenharmony_ci}
31158c2ecf20Sopenharmony_ci
31168c2ecf20Sopenharmony_civoid tcp_sock_set_quickack(struct sock *sk, int val)
31178c2ecf20Sopenharmony_ci{
31188c2ecf20Sopenharmony_ci	lock_sock(sk);
31198c2ecf20Sopenharmony_ci	__tcp_sock_set_quickack(sk, val);
31208c2ecf20Sopenharmony_ci	release_sock(sk);
31218c2ecf20Sopenharmony_ci}
31228c2ecf20Sopenharmony_ciEXPORT_SYMBOL(tcp_sock_set_quickack);
31238c2ecf20Sopenharmony_ci
31248c2ecf20Sopenharmony_ciint tcp_sock_set_syncnt(struct sock *sk, int val)
31258c2ecf20Sopenharmony_ci{
31268c2ecf20Sopenharmony_ci	if (val < 1 || val > MAX_TCP_SYNCNT)
31278c2ecf20Sopenharmony_ci		return -EINVAL;
31288c2ecf20Sopenharmony_ci
31298c2ecf20Sopenharmony_ci	lock_sock(sk);
31308c2ecf20Sopenharmony_ci	WRITE_ONCE(inet_csk(sk)->icsk_syn_retries, val);
31318c2ecf20Sopenharmony_ci	release_sock(sk);
31328c2ecf20Sopenharmony_ci	return 0;
31338c2ecf20Sopenharmony_ci}
31348c2ecf20Sopenharmony_ciEXPORT_SYMBOL(tcp_sock_set_syncnt);
31358c2ecf20Sopenharmony_ci
31368c2ecf20Sopenharmony_civoid tcp_sock_set_user_timeout(struct sock *sk, u32 val)
31378c2ecf20Sopenharmony_ci{
31388c2ecf20Sopenharmony_ci	lock_sock(sk);
31398c2ecf20Sopenharmony_ci	WRITE_ONCE(inet_csk(sk)->icsk_user_timeout, val);
31408c2ecf20Sopenharmony_ci	release_sock(sk);
31418c2ecf20Sopenharmony_ci}
31428c2ecf20Sopenharmony_ciEXPORT_SYMBOL(tcp_sock_set_user_timeout);
31438c2ecf20Sopenharmony_ci
31448c2ecf20Sopenharmony_ciint tcp_sock_set_keepidle_locked(struct sock *sk, int val)
31458c2ecf20Sopenharmony_ci{
31468c2ecf20Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
31478c2ecf20Sopenharmony_ci
31488c2ecf20Sopenharmony_ci	if (val < 1 || val > MAX_TCP_KEEPIDLE)
31498c2ecf20Sopenharmony_ci		return -EINVAL;
31508c2ecf20Sopenharmony_ci
31518c2ecf20Sopenharmony_ci	/* Paired with WRITE_ONCE() in keepalive_time_when() */
31528c2ecf20Sopenharmony_ci	WRITE_ONCE(tp->keepalive_time, val * HZ);
31538c2ecf20Sopenharmony_ci	if (sock_flag(sk, SOCK_KEEPOPEN) &&
31548c2ecf20Sopenharmony_ci	    !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
31558c2ecf20Sopenharmony_ci		u32 elapsed = keepalive_time_elapsed(tp);
31568c2ecf20Sopenharmony_ci
31578c2ecf20Sopenharmony_ci		if (tp->keepalive_time > elapsed)
31588c2ecf20Sopenharmony_ci			elapsed = tp->keepalive_time - elapsed;
31598c2ecf20Sopenharmony_ci		else
31608c2ecf20Sopenharmony_ci			elapsed = 0;
31618c2ecf20Sopenharmony_ci		inet_csk_reset_keepalive_timer(sk, elapsed);
31628c2ecf20Sopenharmony_ci	}
31638c2ecf20Sopenharmony_ci
31648c2ecf20Sopenharmony_ci	return 0;
31658c2ecf20Sopenharmony_ci}
31668c2ecf20Sopenharmony_ci
31678c2ecf20Sopenharmony_ciint tcp_sock_set_keepidle(struct sock *sk, int val)
31688c2ecf20Sopenharmony_ci{
31698c2ecf20Sopenharmony_ci	int err;
31708c2ecf20Sopenharmony_ci
31718c2ecf20Sopenharmony_ci	lock_sock(sk);
31728c2ecf20Sopenharmony_ci	err = tcp_sock_set_keepidle_locked(sk, val);
31738c2ecf20Sopenharmony_ci	release_sock(sk);
31748c2ecf20Sopenharmony_ci	return err;
31758c2ecf20Sopenharmony_ci}
31768c2ecf20Sopenharmony_ciEXPORT_SYMBOL(tcp_sock_set_keepidle);
31778c2ecf20Sopenharmony_ci
31788c2ecf20Sopenharmony_ciint tcp_sock_set_keepintvl(struct sock *sk, int val)
31798c2ecf20Sopenharmony_ci{
31808c2ecf20Sopenharmony_ci	if (val < 1 || val > MAX_TCP_KEEPINTVL)
31818c2ecf20Sopenharmony_ci		return -EINVAL;
31828c2ecf20Sopenharmony_ci
31838c2ecf20Sopenharmony_ci	lock_sock(sk);
31848c2ecf20Sopenharmony_ci	WRITE_ONCE(tcp_sk(sk)->keepalive_intvl, val * HZ);
31858c2ecf20Sopenharmony_ci	release_sock(sk);
31868c2ecf20Sopenharmony_ci	return 0;
31878c2ecf20Sopenharmony_ci}
31888c2ecf20Sopenharmony_ciEXPORT_SYMBOL(tcp_sock_set_keepintvl);
31898c2ecf20Sopenharmony_ci
31908c2ecf20Sopenharmony_ciint tcp_sock_set_keepcnt(struct sock *sk, int val)
31918c2ecf20Sopenharmony_ci{
31928c2ecf20Sopenharmony_ci	if (val < 1 || val > MAX_TCP_KEEPCNT)
31938c2ecf20Sopenharmony_ci		return -EINVAL;
31948c2ecf20Sopenharmony_ci
31958c2ecf20Sopenharmony_ci	lock_sock(sk);
31968c2ecf20Sopenharmony_ci	/* Paired with READ_ONCE() in keepalive_probes() */
31978c2ecf20Sopenharmony_ci	WRITE_ONCE(tcp_sk(sk)->keepalive_probes, val);
31988c2ecf20Sopenharmony_ci	release_sock(sk);
31998c2ecf20Sopenharmony_ci	return 0;
32008c2ecf20Sopenharmony_ci}
32018c2ecf20Sopenharmony_ciEXPORT_SYMBOL(tcp_sock_set_keepcnt);
32028c2ecf20Sopenharmony_ci
32038c2ecf20Sopenharmony_ci/*
32048c2ecf20Sopenharmony_ci *	Socket option code for TCP.
32058c2ecf20Sopenharmony_ci */
32068c2ecf20Sopenharmony_cistatic int do_tcp_setsockopt(struct sock *sk, int level, int optname,
32078c2ecf20Sopenharmony_ci		sockptr_t optval, unsigned int optlen)
32088c2ecf20Sopenharmony_ci{
32098c2ecf20Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
32108c2ecf20Sopenharmony_ci	struct inet_connection_sock *icsk = inet_csk(sk);
32118c2ecf20Sopenharmony_ci	struct net *net = sock_net(sk);
32128c2ecf20Sopenharmony_ci	int val;
32138c2ecf20Sopenharmony_ci	int err = 0;
32148c2ecf20Sopenharmony_ci
32158c2ecf20Sopenharmony_ci	/* These are data/string values, all the others are ints */
32168c2ecf20Sopenharmony_ci	switch (optname) {
32178c2ecf20Sopenharmony_ci	case TCP_CONGESTION: {
32188c2ecf20Sopenharmony_ci		char name[TCP_CA_NAME_MAX];
32198c2ecf20Sopenharmony_ci
32208c2ecf20Sopenharmony_ci		if (optlen < 1)
32218c2ecf20Sopenharmony_ci			return -EINVAL;
32228c2ecf20Sopenharmony_ci
32238c2ecf20Sopenharmony_ci		val = strncpy_from_sockptr(name, optval,
32248c2ecf20Sopenharmony_ci					min_t(long, TCP_CA_NAME_MAX-1, optlen));
32258c2ecf20Sopenharmony_ci		if (val < 0)
32268c2ecf20Sopenharmony_ci			return -EFAULT;
32278c2ecf20Sopenharmony_ci		name[val] = 0;
32288c2ecf20Sopenharmony_ci
32298c2ecf20Sopenharmony_ci		lock_sock(sk);
32308c2ecf20Sopenharmony_ci		err = tcp_set_congestion_control(sk, name, true,
32318c2ecf20Sopenharmony_ci						 ns_capable(sock_net(sk)->user_ns,
32328c2ecf20Sopenharmony_ci							    CAP_NET_ADMIN));
32338c2ecf20Sopenharmony_ci		release_sock(sk);
32348c2ecf20Sopenharmony_ci		return err;
32358c2ecf20Sopenharmony_ci	}
32368c2ecf20Sopenharmony_ci	case TCP_ULP: {
32378c2ecf20Sopenharmony_ci		char name[TCP_ULP_NAME_MAX];
32388c2ecf20Sopenharmony_ci
32398c2ecf20Sopenharmony_ci		if (optlen < 1)
32408c2ecf20Sopenharmony_ci			return -EINVAL;
32418c2ecf20Sopenharmony_ci
32428c2ecf20Sopenharmony_ci		val = strncpy_from_sockptr(name, optval,
32438c2ecf20Sopenharmony_ci					min_t(long, TCP_ULP_NAME_MAX - 1,
32448c2ecf20Sopenharmony_ci					      optlen));
32458c2ecf20Sopenharmony_ci		if (val < 0)
32468c2ecf20Sopenharmony_ci			return -EFAULT;
32478c2ecf20Sopenharmony_ci		name[val] = 0;
32488c2ecf20Sopenharmony_ci
32498c2ecf20Sopenharmony_ci		lock_sock(sk);
32508c2ecf20Sopenharmony_ci		err = tcp_set_ulp(sk, name);
32518c2ecf20Sopenharmony_ci		release_sock(sk);
32528c2ecf20Sopenharmony_ci		return err;
32538c2ecf20Sopenharmony_ci	}
32548c2ecf20Sopenharmony_ci	case TCP_FASTOPEN_KEY: {
32558c2ecf20Sopenharmony_ci		__u8 key[TCP_FASTOPEN_KEY_BUF_LENGTH];
32568c2ecf20Sopenharmony_ci		__u8 *backup_key = NULL;
32578c2ecf20Sopenharmony_ci
32588c2ecf20Sopenharmony_ci		/* Allow a backup key as well to facilitate key rotation
32598c2ecf20Sopenharmony_ci		 * First key is the active one.
32608c2ecf20Sopenharmony_ci		 */
32618c2ecf20Sopenharmony_ci		if (optlen != TCP_FASTOPEN_KEY_LENGTH &&
32628c2ecf20Sopenharmony_ci		    optlen != TCP_FASTOPEN_KEY_BUF_LENGTH)
32638c2ecf20Sopenharmony_ci			return -EINVAL;
32648c2ecf20Sopenharmony_ci
32658c2ecf20Sopenharmony_ci		if (copy_from_sockptr(key, optval, optlen))
32668c2ecf20Sopenharmony_ci			return -EFAULT;
32678c2ecf20Sopenharmony_ci
32688c2ecf20Sopenharmony_ci		if (optlen == TCP_FASTOPEN_KEY_BUF_LENGTH)
32698c2ecf20Sopenharmony_ci			backup_key = key + TCP_FASTOPEN_KEY_LENGTH;
32708c2ecf20Sopenharmony_ci
32718c2ecf20Sopenharmony_ci		return tcp_fastopen_reset_cipher(net, sk, key, backup_key);
32728c2ecf20Sopenharmony_ci	}
32738c2ecf20Sopenharmony_ci	default:
32748c2ecf20Sopenharmony_ci		/* fallthru */
32758c2ecf20Sopenharmony_ci		break;
32768c2ecf20Sopenharmony_ci	}
32778c2ecf20Sopenharmony_ci
32788c2ecf20Sopenharmony_ci	if (optlen < sizeof(int))
32798c2ecf20Sopenharmony_ci		return -EINVAL;
32808c2ecf20Sopenharmony_ci
32818c2ecf20Sopenharmony_ci	if (copy_from_sockptr(&val, optval, sizeof(val)))
32828c2ecf20Sopenharmony_ci		return -EFAULT;
32838c2ecf20Sopenharmony_ci
32848c2ecf20Sopenharmony_ci	lock_sock(sk);
32858c2ecf20Sopenharmony_ci
32868c2ecf20Sopenharmony_ci	switch (optname) {
32878c2ecf20Sopenharmony_ci	case TCP_MAXSEG:
32888c2ecf20Sopenharmony_ci		/* Values greater than interface MTU won't take effect. However
32898c2ecf20Sopenharmony_ci		 * at the point when this call is done we typically don't yet
32908c2ecf20Sopenharmony_ci		 * know which interface is going to be used
32918c2ecf20Sopenharmony_ci		 */
32928c2ecf20Sopenharmony_ci		if (val && (val < TCP_MIN_MSS || val > MAX_TCP_WINDOW)) {
32938c2ecf20Sopenharmony_ci			err = -EINVAL;
32948c2ecf20Sopenharmony_ci			break;
32958c2ecf20Sopenharmony_ci		}
32968c2ecf20Sopenharmony_ci		tp->rx_opt.user_mss = val;
32978c2ecf20Sopenharmony_ci		break;
32988c2ecf20Sopenharmony_ci
32998c2ecf20Sopenharmony_ci	case TCP_NODELAY:
33008c2ecf20Sopenharmony_ci		__tcp_sock_set_nodelay(sk, val);
33018c2ecf20Sopenharmony_ci		break;
33028c2ecf20Sopenharmony_ci
33038c2ecf20Sopenharmony_ci	case TCP_THIN_LINEAR_TIMEOUTS:
33048c2ecf20Sopenharmony_ci		if (val < 0 || val > 1)
33058c2ecf20Sopenharmony_ci			err = -EINVAL;
33068c2ecf20Sopenharmony_ci		else
33078c2ecf20Sopenharmony_ci			tp->thin_lto = val;
33088c2ecf20Sopenharmony_ci		break;
33098c2ecf20Sopenharmony_ci
33108c2ecf20Sopenharmony_ci	case TCP_THIN_DUPACK:
33118c2ecf20Sopenharmony_ci		if (val < 0 || val > 1)
33128c2ecf20Sopenharmony_ci			err = -EINVAL;
33138c2ecf20Sopenharmony_ci		break;
33148c2ecf20Sopenharmony_ci
33158c2ecf20Sopenharmony_ci	case TCP_REPAIR:
33168c2ecf20Sopenharmony_ci		if (!tcp_can_repair_sock(sk))
33178c2ecf20Sopenharmony_ci			err = -EPERM;
33188c2ecf20Sopenharmony_ci		else if (val == TCP_REPAIR_ON) {
33198c2ecf20Sopenharmony_ci			tp->repair = 1;
33208c2ecf20Sopenharmony_ci			sk->sk_reuse = SK_FORCE_REUSE;
33218c2ecf20Sopenharmony_ci			tp->repair_queue = TCP_NO_QUEUE;
33228c2ecf20Sopenharmony_ci		} else if (val == TCP_REPAIR_OFF) {
33238c2ecf20Sopenharmony_ci			tp->repair = 0;
33248c2ecf20Sopenharmony_ci			sk->sk_reuse = SK_NO_REUSE;
33258c2ecf20Sopenharmony_ci			tcp_send_window_probe(sk);
33268c2ecf20Sopenharmony_ci		} else if (val == TCP_REPAIR_OFF_NO_WP) {
33278c2ecf20Sopenharmony_ci			tp->repair = 0;
33288c2ecf20Sopenharmony_ci			sk->sk_reuse = SK_NO_REUSE;
33298c2ecf20Sopenharmony_ci		} else
33308c2ecf20Sopenharmony_ci			err = -EINVAL;
33318c2ecf20Sopenharmony_ci
33328c2ecf20Sopenharmony_ci		break;
33338c2ecf20Sopenharmony_ci
33348c2ecf20Sopenharmony_ci	case TCP_REPAIR_QUEUE:
33358c2ecf20Sopenharmony_ci		if (!tp->repair)
33368c2ecf20Sopenharmony_ci			err = -EPERM;
33378c2ecf20Sopenharmony_ci		else if ((unsigned int)val < TCP_QUEUES_NR)
33388c2ecf20Sopenharmony_ci			tp->repair_queue = val;
33398c2ecf20Sopenharmony_ci		else
33408c2ecf20Sopenharmony_ci			err = -EINVAL;
33418c2ecf20Sopenharmony_ci		break;
33428c2ecf20Sopenharmony_ci
33438c2ecf20Sopenharmony_ci	case TCP_QUEUE_SEQ:
33448c2ecf20Sopenharmony_ci		if (sk->sk_state != TCP_CLOSE) {
33458c2ecf20Sopenharmony_ci			err = -EPERM;
33468c2ecf20Sopenharmony_ci		} else if (tp->repair_queue == TCP_SEND_QUEUE) {
33478c2ecf20Sopenharmony_ci			if (!tcp_rtx_queue_empty(sk))
33488c2ecf20Sopenharmony_ci				err = -EPERM;
33498c2ecf20Sopenharmony_ci			else
33508c2ecf20Sopenharmony_ci				WRITE_ONCE(tp->write_seq, val);
33518c2ecf20Sopenharmony_ci		} else if (tp->repair_queue == TCP_RECV_QUEUE) {
33528c2ecf20Sopenharmony_ci			if (tp->rcv_nxt != tp->copied_seq) {
33538c2ecf20Sopenharmony_ci				err = -EPERM;
33548c2ecf20Sopenharmony_ci			} else {
33558c2ecf20Sopenharmony_ci				WRITE_ONCE(tp->rcv_nxt, val);
33568c2ecf20Sopenharmony_ci				WRITE_ONCE(tp->copied_seq, val);
33578c2ecf20Sopenharmony_ci			}
33588c2ecf20Sopenharmony_ci		} else {
33598c2ecf20Sopenharmony_ci			err = -EINVAL;
33608c2ecf20Sopenharmony_ci		}
33618c2ecf20Sopenharmony_ci		break;
33628c2ecf20Sopenharmony_ci
33638c2ecf20Sopenharmony_ci	case TCP_REPAIR_OPTIONS:
33648c2ecf20Sopenharmony_ci		if (!tp->repair)
33658c2ecf20Sopenharmony_ci			err = -EINVAL;
33668c2ecf20Sopenharmony_ci		else if (sk->sk_state == TCP_ESTABLISHED && !tp->bytes_sent)
33678c2ecf20Sopenharmony_ci			err = tcp_repair_options_est(sk, optval, optlen);
33688c2ecf20Sopenharmony_ci		else
33698c2ecf20Sopenharmony_ci			err = -EPERM;
33708c2ecf20Sopenharmony_ci		break;
33718c2ecf20Sopenharmony_ci
33728c2ecf20Sopenharmony_ci	case TCP_CORK:
33738c2ecf20Sopenharmony_ci		__tcp_sock_set_cork(sk, val);
33748c2ecf20Sopenharmony_ci		break;
33758c2ecf20Sopenharmony_ci
33768c2ecf20Sopenharmony_ci	case TCP_KEEPIDLE:
33778c2ecf20Sopenharmony_ci		err = tcp_sock_set_keepidle_locked(sk, val);
33788c2ecf20Sopenharmony_ci		break;
33798c2ecf20Sopenharmony_ci	case TCP_KEEPINTVL:
33808c2ecf20Sopenharmony_ci		if (val < 1 || val > MAX_TCP_KEEPINTVL)
33818c2ecf20Sopenharmony_ci			err = -EINVAL;
33828c2ecf20Sopenharmony_ci		else
33838c2ecf20Sopenharmony_ci			WRITE_ONCE(tp->keepalive_intvl, val * HZ);
33848c2ecf20Sopenharmony_ci		break;
33858c2ecf20Sopenharmony_ci	case TCP_KEEPCNT:
33868c2ecf20Sopenharmony_ci		if (val < 1 || val > MAX_TCP_KEEPCNT)
33878c2ecf20Sopenharmony_ci			err = -EINVAL;
33888c2ecf20Sopenharmony_ci		else
33898c2ecf20Sopenharmony_ci			WRITE_ONCE(tp->keepalive_probes, val);
33908c2ecf20Sopenharmony_ci		break;
33918c2ecf20Sopenharmony_ci	case TCP_SYNCNT:
33928c2ecf20Sopenharmony_ci		if (val < 1 || val > MAX_TCP_SYNCNT)
33938c2ecf20Sopenharmony_ci			err = -EINVAL;
33948c2ecf20Sopenharmony_ci		else
33958c2ecf20Sopenharmony_ci			WRITE_ONCE(icsk->icsk_syn_retries, val);
33968c2ecf20Sopenharmony_ci		break;
33978c2ecf20Sopenharmony_ci
33988c2ecf20Sopenharmony_ci	case TCP_SAVE_SYN:
33998c2ecf20Sopenharmony_ci		/* 0: disable, 1: enable, 2: start from ether_header */
34008c2ecf20Sopenharmony_ci		if (val < 0 || val > 2)
34018c2ecf20Sopenharmony_ci			err = -EINVAL;
34028c2ecf20Sopenharmony_ci		else
34038c2ecf20Sopenharmony_ci			tp->save_syn = val;
34048c2ecf20Sopenharmony_ci		break;
34058c2ecf20Sopenharmony_ci
34068c2ecf20Sopenharmony_ci	case TCP_LINGER2:
34078c2ecf20Sopenharmony_ci		if (val < 0)
34088c2ecf20Sopenharmony_ci			WRITE_ONCE(tp->linger2, -1);
34098c2ecf20Sopenharmony_ci		else if (val > TCP_FIN_TIMEOUT_MAX / HZ)
34108c2ecf20Sopenharmony_ci			WRITE_ONCE(tp->linger2, TCP_FIN_TIMEOUT_MAX);
34118c2ecf20Sopenharmony_ci		else
34128c2ecf20Sopenharmony_ci			WRITE_ONCE(tp->linger2, val * HZ);
34138c2ecf20Sopenharmony_ci		break;
34148c2ecf20Sopenharmony_ci
34158c2ecf20Sopenharmony_ci	case TCP_DEFER_ACCEPT:
34168c2ecf20Sopenharmony_ci		/* Translate value in seconds to number of retransmits */
34178c2ecf20Sopenharmony_ci		WRITE_ONCE(icsk->icsk_accept_queue.rskq_defer_accept,
34188c2ecf20Sopenharmony_ci			   secs_to_retrans(val, TCP_TIMEOUT_INIT / HZ,
34198c2ecf20Sopenharmony_ci					   TCP_RTO_MAX / HZ));
34208c2ecf20Sopenharmony_ci		break;
34218c2ecf20Sopenharmony_ci
34228c2ecf20Sopenharmony_ci	case TCP_WINDOW_CLAMP:
34238c2ecf20Sopenharmony_ci		if (!val) {
34248c2ecf20Sopenharmony_ci			if (sk->sk_state != TCP_CLOSE) {
34258c2ecf20Sopenharmony_ci				err = -EINVAL;
34268c2ecf20Sopenharmony_ci				break;
34278c2ecf20Sopenharmony_ci			}
34288c2ecf20Sopenharmony_ci			tp->window_clamp = 0;
34298c2ecf20Sopenharmony_ci		} else
34308c2ecf20Sopenharmony_ci			tp->window_clamp = val < SOCK_MIN_RCVBUF / 2 ?
34318c2ecf20Sopenharmony_ci						SOCK_MIN_RCVBUF / 2 : val;
34328c2ecf20Sopenharmony_ci		break;
34338c2ecf20Sopenharmony_ci
34348c2ecf20Sopenharmony_ci	case TCP_QUICKACK:
34358c2ecf20Sopenharmony_ci		__tcp_sock_set_quickack(sk, val);
34368c2ecf20Sopenharmony_ci		break;
34378c2ecf20Sopenharmony_ci
34388c2ecf20Sopenharmony_ci#ifdef CONFIG_TCP_MD5SIG
34398c2ecf20Sopenharmony_ci	case TCP_MD5SIG:
34408c2ecf20Sopenharmony_ci	case TCP_MD5SIG_EXT:
34418c2ecf20Sopenharmony_ci		err = tp->af_specific->md5_parse(sk, optname, optval, optlen);
34428c2ecf20Sopenharmony_ci		break;
34438c2ecf20Sopenharmony_ci#endif
34448c2ecf20Sopenharmony_ci	case TCP_USER_TIMEOUT:
34458c2ecf20Sopenharmony_ci		/* Cap the max time in ms TCP will retry or probe the window
34468c2ecf20Sopenharmony_ci		 * before giving up and aborting (ETIMEDOUT) a connection.
34478c2ecf20Sopenharmony_ci		 */
34488c2ecf20Sopenharmony_ci		if (val < 0)
34498c2ecf20Sopenharmony_ci			err = -EINVAL;
34508c2ecf20Sopenharmony_ci		else
34518c2ecf20Sopenharmony_ci			WRITE_ONCE(icsk->icsk_user_timeout, val);
34528c2ecf20Sopenharmony_ci		break;
34538c2ecf20Sopenharmony_ci
34548c2ecf20Sopenharmony_ci	case TCP_FASTOPEN:
34558c2ecf20Sopenharmony_ci		if (val >= 0 && ((1 << sk->sk_state) & (TCPF_CLOSE |
34568c2ecf20Sopenharmony_ci		    TCPF_LISTEN))) {
34578c2ecf20Sopenharmony_ci			tcp_fastopen_init_key_once(net);
34588c2ecf20Sopenharmony_ci
34598c2ecf20Sopenharmony_ci			fastopen_queue_tune(sk, val);
34608c2ecf20Sopenharmony_ci		} else {
34618c2ecf20Sopenharmony_ci			err = -EINVAL;
34628c2ecf20Sopenharmony_ci		}
34638c2ecf20Sopenharmony_ci		break;
34648c2ecf20Sopenharmony_ci	case TCP_FASTOPEN_CONNECT:
34658c2ecf20Sopenharmony_ci		if (val > 1 || val < 0) {
34668c2ecf20Sopenharmony_ci			err = -EINVAL;
34678c2ecf20Sopenharmony_ci		} else if (READ_ONCE(net->ipv4.sysctl_tcp_fastopen) &
34688c2ecf20Sopenharmony_ci			   TFO_CLIENT_ENABLE) {
34698c2ecf20Sopenharmony_ci			if (sk->sk_state == TCP_CLOSE)
34708c2ecf20Sopenharmony_ci				tp->fastopen_connect = val;
34718c2ecf20Sopenharmony_ci			else
34728c2ecf20Sopenharmony_ci				err = -EINVAL;
34738c2ecf20Sopenharmony_ci		} else {
34748c2ecf20Sopenharmony_ci			err = -EOPNOTSUPP;
34758c2ecf20Sopenharmony_ci		}
34768c2ecf20Sopenharmony_ci		break;
34778c2ecf20Sopenharmony_ci	case TCP_FASTOPEN_NO_COOKIE:
34788c2ecf20Sopenharmony_ci		if (val > 1 || val < 0)
34798c2ecf20Sopenharmony_ci			err = -EINVAL;
34808c2ecf20Sopenharmony_ci		else if (!((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)))
34818c2ecf20Sopenharmony_ci			err = -EINVAL;
34828c2ecf20Sopenharmony_ci		else
34838c2ecf20Sopenharmony_ci			tp->fastopen_no_cookie = val;
34848c2ecf20Sopenharmony_ci		break;
34858c2ecf20Sopenharmony_ci	case TCP_TIMESTAMP:
34868c2ecf20Sopenharmony_ci		if (!tp->repair)
34878c2ecf20Sopenharmony_ci			err = -EPERM;
34888c2ecf20Sopenharmony_ci		else
34898c2ecf20Sopenharmony_ci			tp->tsoffset = val - tcp_time_stamp_raw();
34908c2ecf20Sopenharmony_ci		break;
34918c2ecf20Sopenharmony_ci	case TCP_REPAIR_WINDOW:
34928c2ecf20Sopenharmony_ci		err = tcp_repair_set_window(tp, optval, optlen);
34938c2ecf20Sopenharmony_ci		break;
34948c2ecf20Sopenharmony_ci	case TCP_NOTSENT_LOWAT:
34958c2ecf20Sopenharmony_ci		WRITE_ONCE(tp->notsent_lowat, val);
34968c2ecf20Sopenharmony_ci		sk->sk_write_space(sk);
34978c2ecf20Sopenharmony_ci		break;
34988c2ecf20Sopenharmony_ci	case TCP_INQ:
34998c2ecf20Sopenharmony_ci		if (val > 1 || val < 0)
35008c2ecf20Sopenharmony_ci			err = -EINVAL;
35018c2ecf20Sopenharmony_ci		else
35028c2ecf20Sopenharmony_ci			tp->recvmsg_inq = val;
35038c2ecf20Sopenharmony_ci		break;
35048c2ecf20Sopenharmony_ci	case TCP_TX_DELAY:
35058c2ecf20Sopenharmony_ci		if (val)
35068c2ecf20Sopenharmony_ci			tcp_enable_tx_delay();
35078c2ecf20Sopenharmony_ci		WRITE_ONCE(tp->tcp_tx_delay, val);
35088c2ecf20Sopenharmony_ci		break;
35098c2ecf20Sopenharmony_ci#ifdef CONFIG_TCP_NATA_URC
35108c2ecf20Sopenharmony_ci	case TCP_NATA_URC:
35118c2ecf20Sopenharmony_ci		err = tcp_set_nata_urc(sk, optval, optlen);
35128c2ecf20Sopenharmony_ci		break;
35138c2ecf20Sopenharmony_ci#endif
35148c2ecf20Sopenharmony_ci#ifdef CONFIG_TCP_NATA_STL
35158c2ecf20Sopenharmony_ci	case TCP_NATA_STL:
35168c2ecf20Sopenharmony_ci		err = tcp_set_nata_stl(sk, optval, optlen);
35178c2ecf20Sopenharmony_ci		break;
35188c2ecf20Sopenharmony_ci#endif
35198c2ecf20Sopenharmony_ci	default:
35208c2ecf20Sopenharmony_ci		err = -ENOPROTOOPT;
35218c2ecf20Sopenharmony_ci		break;
35228c2ecf20Sopenharmony_ci	}
35238c2ecf20Sopenharmony_ci
35248c2ecf20Sopenharmony_ci	release_sock(sk);
35258c2ecf20Sopenharmony_ci	return err;
35268c2ecf20Sopenharmony_ci}
35278c2ecf20Sopenharmony_ci
35288c2ecf20Sopenharmony_ciint tcp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
35298c2ecf20Sopenharmony_ci		   unsigned int optlen)
35308c2ecf20Sopenharmony_ci{
35318c2ecf20Sopenharmony_ci	const struct inet_connection_sock *icsk = inet_csk(sk);
35328c2ecf20Sopenharmony_ci
35338c2ecf20Sopenharmony_ci	if (level != SOL_TCP)
35348c2ecf20Sopenharmony_ci		/* Paired with WRITE_ONCE() in do_ipv6_setsockopt() and tcp_v6_connect() */
35358c2ecf20Sopenharmony_ci		return READ_ONCE(icsk->icsk_af_ops)->setsockopt(sk, level, optname,
35368c2ecf20Sopenharmony_ci								optval, optlen);
35378c2ecf20Sopenharmony_ci	return do_tcp_setsockopt(sk, level, optname, optval, optlen);
35388c2ecf20Sopenharmony_ci}
35398c2ecf20Sopenharmony_ciEXPORT_SYMBOL(tcp_setsockopt);
35408c2ecf20Sopenharmony_ci
35418c2ecf20Sopenharmony_cistatic void tcp_get_info_chrono_stats(const struct tcp_sock *tp,
35428c2ecf20Sopenharmony_ci				      struct tcp_info *info)
35438c2ecf20Sopenharmony_ci{
35448c2ecf20Sopenharmony_ci	u64 stats[__TCP_CHRONO_MAX], total = 0;
35458c2ecf20Sopenharmony_ci	enum tcp_chrono i;
35468c2ecf20Sopenharmony_ci
35478c2ecf20Sopenharmony_ci	for (i = TCP_CHRONO_BUSY; i < __TCP_CHRONO_MAX; ++i) {
35488c2ecf20Sopenharmony_ci		stats[i] = tp->chrono_stat[i - 1];
35498c2ecf20Sopenharmony_ci		if (i == tp->chrono_type)
35508c2ecf20Sopenharmony_ci			stats[i] += tcp_jiffies32 - tp->chrono_start;
35518c2ecf20Sopenharmony_ci		stats[i] *= USEC_PER_SEC / HZ;
35528c2ecf20Sopenharmony_ci		total += stats[i];
35538c2ecf20Sopenharmony_ci	}
35548c2ecf20Sopenharmony_ci
35558c2ecf20Sopenharmony_ci	info->tcpi_busy_time = total;
35568c2ecf20Sopenharmony_ci	info->tcpi_rwnd_limited = stats[TCP_CHRONO_RWND_LIMITED];
35578c2ecf20Sopenharmony_ci	info->tcpi_sndbuf_limited = stats[TCP_CHRONO_SNDBUF_LIMITED];
35588c2ecf20Sopenharmony_ci}
35598c2ecf20Sopenharmony_ci
35608c2ecf20Sopenharmony_ci/* Return information about state of tcp endpoint in API format. */
35618c2ecf20Sopenharmony_civoid tcp_get_info(struct sock *sk, struct tcp_info *info)
35628c2ecf20Sopenharmony_ci{
35638c2ecf20Sopenharmony_ci	const struct tcp_sock *tp = tcp_sk(sk); /* iff sk_type == SOCK_STREAM */
35648c2ecf20Sopenharmony_ci	const struct inet_connection_sock *icsk = inet_csk(sk);
35658c2ecf20Sopenharmony_ci	unsigned long rate;
35668c2ecf20Sopenharmony_ci	u32 now;
35678c2ecf20Sopenharmony_ci	u64 rate64;
35688c2ecf20Sopenharmony_ci	bool slow;
35698c2ecf20Sopenharmony_ci
35708c2ecf20Sopenharmony_ci	memset(info, 0, sizeof(*info));
35718c2ecf20Sopenharmony_ci	if (sk->sk_type != SOCK_STREAM)
35728c2ecf20Sopenharmony_ci		return;
35738c2ecf20Sopenharmony_ci
35748c2ecf20Sopenharmony_ci	info->tcpi_state = inet_sk_state_load(sk);
35758c2ecf20Sopenharmony_ci
35768c2ecf20Sopenharmony_ci	/* Report meaningful fields for all TCP states, including listeners */
35778c2ecf20Sopenharmony_ci	rate = READ_ONCE(sk->sk_pacing_rate);
35788c2ecf20Sopenharmony_ci	rate64 = (rate != ~0UL) ? rate : ~0ULL;
35798c2ecf20Sopenharmony_ci	info->tcpi_pacing_rate = rate64;
35808c2ecf20Sopenharmony_ci
35818c2ecf20Sopenharmony_ci	rate = READ_ONCE(sk->sk_max_pacing_rate);
35828c2ecf20Sopenharmony_ci	rate64 = (rate != ~0UL) ? rate : ~0ULL;
35838c2ecf20Sopenharmony_ci	info->tcpi_max_pacing_rate = rate64;
35848c2ecf20Sopenharmony_ci
35858c2ecf20Sopenharmony_ci	info->tcpi_reordering = tp->reordering;
35868c2ecf20Sopenharmony_ci	info->tcpi_snd_cwnd = tp->snd_cwnd;
35878c2ecf20Sopenharmony_ci
35888c2ecf20Sopenharmony_ci	if (info->tcpi_state == TCP_LISTEN) {
35898c2ecf20Sopenharmony_ci		/* listeners aliased fields :
35908c2ecf20Sopenharmony_ci		 * tcpi_unacked -> Number of children ready for accept()
35918c2ecf20Sopenharmony_ci		 * tcpi_sacked  -> max backlog
35928c2ecf20Sopenharmony_ci		 */
35938c2ecf20Sopenharmony_ci		info->tcpi_unacked = READ_ONCE(sk->sk_ack_backlog);
35948c2ecf20Sopenharmony_ci		info->tcpi_sacked = READ_ONCE(sk->sk_max_ack_backlog);
35958c2ecf20Sopenharmony_ci		return;
35968c2ecf20Sopenharmony_ci	}
35978c2ecf20Sopenharmony_ci
35988c2ecf20Sopenharmony_ci	slow = lock_sock_fast(sk);
35998c2ecf20Sopenharmony_ci
36008c2ecf20Sopenharmony_ci	info->tcpi_ca_state = icsk->icsk_ca_state;
36018c2ecf20Sopenharmony_ci	info->tcpi_retransmits = icsk->icsk_retransmits;
36028c2ecf20Sopenharmony_ci	info->tcpi_probes = icsk->icsk_probes_out;
36038c2ecf20Sopenharmony_ci	info->tcpi_backoff = icsk->icsk_backoff;
36048c2ecf20Sopenharmony_ci
36058c2ecf20Sopenharmony_ci	if (tp->rx_opt.tstamp_ok)
36068c2ecf20Sopenharmony_ci		info->tcpi_options |= TCPI_OPT_TIMESTAMPS;
36078c2ecf20Sopenharmony_ci	if (tcp_is_sack(tp))
36088c2ecf20Sopenharmony_ci		info->tcpi_options |= TCPI_OPT_SACK;
36098c2ecf20Sopenharmony_ci	if (tp->rx_opt.wscale_ok) {
36108c2ecf20Sopenharmony_ci		info->tcpi_options |= TCPI_OPT_WSCALE;
36118c2ecf20Sopenharmony_ci		info->tcpi_snd_wscale = tp->rx_opt.snd_wscale;
36128c2ecf20Sopenharmony_ci		info->tcpi_rcv_wscale = tp->rx_opt.rcv_wscale;
36138c2ecf20Sopenharmony_ci	}
36148c2ecf20Sopenharmony_ci
36158c2ecf20Sopenharmony_ci	if (tp->ecn_flags & TCP_ECN_OK)
36168c2ecf20Sopenharmony_ci		info->tcpi_options |= TCPI_OPT_ECN;
36178c2ecf20Sopenharmony_ci	if (tp->ecn_flags & TCP_ECN_SEEN)
36188c2ecf20Sopenharmony_ci		info->tcpi_options |= TCPI_OPT_ECN_SEEN;
36198c2ecf20Sopenharmony_ci	if (tp->syn_data_acked)
36208c2ecf20Sopenharmony_ci		info->tcpi_options |= TCPI_OPT_SYN_DATA;
36218c2ecf20Sopenharmony_ci
36228c2ecf20Sopenharmony_ci	info->tcpi_rto = jiffies_to_usecs(icsk->icsk_rto);
36238c2ecf20Sopenharmony_ci	info->tcpi_ato = jiffies_to_usecs(icsk->icsk_ack.ato);
36248c2ecf20Sopenharmony_ci	info->tcpi_snd_mss = tp->mss_cache;
36258c2ecf20Sopenharmony_ci	info->tcpi_rcv_mss = icsk->icsk_ack.rcv_mss;
36268c2ecf20Sopenharmony_ci
36278c2ecf20Sopenharmony_ci	info->tcpi_unacked = tp->packets_out;
36288c2ecf20Sopenharmony_ci	info->tcpi_sacked = tp->sacked_out;
36298c2ecf20Sopenharmony_ci
36308c2ecf20Sopenharmony_ci	info->tcpi_lost = tp->lost_out;
36318c2ecf20Sopenharmony_ci	info->tcpi_retrans = tp->retrans_out;
36328c2ecf20Sopenharmony_ci
36338c2ecf20Sopenharmony_ci	now = tcp_jiffies32;
36348c2ecf20Sopenharmony_ci	info->tcpi_last_data_sent = jiffies_to_msecs(now - tp->lsndtime);
36358c2ecf20Sopenharmony_ci	info->tcpi_last_data_recv = jiffies_to_msecs(now - icsk->icsk_ack.lrcvtime);
36368c2ecf20Sopenharmony_ci	info->tcpi_last_ack_recv = jiffies_to_msecs(now - tp->rcv_tstamp);
36378c2ecf20Sopenharmony_ci
36388c2ecf20Sopenharmony_ci	info->tcpi_pmtu = icsk->icsk_pmtu_cookie;
36398c2ecf20Sopenharmony_ci	info->tcpi_rcv_ssthresh = tp->rcv_ssthresh;
36408c2ecf20Sopenharmony_ci	info->tcpi_rtt = tp->srtt_us >> 3;
36418c2ecf20Sopenharmony_ci	info->tcpi_rttvar = tp->mdev_us >> 2;
36428c2ecf20Sopenharmony_ci	info->tcpi_snd_ssthresh = tp->snd_ssthresh;
36438c2ecf20Sopenharmony_ci	info->tcpi_advmss = tp->advmss;
36448c2ecf20Sopenharmony_ci
36458c2ecf20Sopenharmony_ci	info->tcpi_rcv_rtt = tp->rcv_rtt_est.rtt_us >> 3;
36468c2ecf20Sopenharmony_ci	info->tcpi_rcv_space = tp->rcvq_space.space;
36478c2ecf20Sopenharmony_ci
36488c2ecf20Sopenharmony_ci	info->tcpi_total_retrans = tp->total_retrans;
36498c2ecf20Sopenharmony_ci
36508c2ecf20Sopenharmony_ci	info->tcpi_bytes_acked = tp->bytes_acked;
36518c2ecf20Sopenharmony_ci	info->tcpi_bytes_received = tp->bytes_received;
36528c2ecf20Sopenharmony_ci	info->tcpi_notsent_bytes = max_t(int, 0, tp->write_seq - tp->snd_nxt);
36538c2ecf20Sopenharmony_ci	tcp_get_info_chrono_stats(tp, info);
36548c2ecf20Sopenharmony_ci
36558c2ecf20Sopenharmony_ci	info->tcpi_segs_out = tp->segs_out;
36568c2ecf20Sopenharmony_ci	info->tcpi_segs_in = tp->segs_in;
36578c2ecf20Sopenharmony_ci
36588c2ecf20Sopenharmony_ci	info->tcpi_min_rtt = tcp_min_rtt(tp);
36598c2ecf20Sopenharmony_ci	info->tcpi_data_segs_in = tp->data_segs_in;
36608c2ecf20Sopenharmony_ci	info->tcpi_data_segs_out = tp->data_segs_out;
36618c2ecf20Sopenharmony_ci
36628c2ecf20Sopenharmony_ci	info->tcpi_delivery_rate_app_limited = tp->rate_app_limited ? 1 : 0;
36638c2ecf20Sopenharmony_ci	rate64 = tcp_compute_delivery_rate(tp);
36648c2ecf20Sopenharmony_ci	if (rate64)
36658c2ecf20Sopenharmony_ci		info->tcpi_delivery_rate = rate64;
36668c2ecf20Sopenharmony_ci	info->tcpi_delivered = tp->delivered;
36678c2ecf20Sopenharmony_ci	info->tcpi_delivered_ce = tp->delivered_ce;
36688c2ecf20Sopenharmony_ci	info->tcpi_bytes_sent = tp->bytes_sent;
36698c2ecf20Sopenharmony_ci	info->tcpi_bytes_retrans = tp->bytes_retrans;
36708c2ecf20Sopenharmony_ci	info->tcpi_dsack_dups = tp->dsack_dups;
36718c2ecf20Sopenharmony_ci	info->tcpi_reord_seen = tp->reord_seen;
36728c2ecf20Sopenharmony_ci	info->tcpi_rcv_ooopack = tp->rcv_ooopack;
36738c2ecf20Sopenharmony_ci	info->tcpi_snd_wnd = tp->snd_wnd;
36748c2ecf20Sopenharmony_ci	info->tcpi_fastopen_client_fail = tp->fastopen_client_fail;
36758c2ecf20Sopenharmony_ci	unlock_sock_fast(sk, slow);
36768c2ecf20Sopenharmony_ci}
36778c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(tcp_get_info);
36788c2ecf20Sopenharmony_ci
36798c2ecf20Sopenharmony_cistatic size_t tcp_opt_stats_get_size(void)
36808c2ecf20Sopenharmony_ci{
36818c2ecf20Sopenharmony_ci	return
36828c2ecf20Sopenharmony_ci		nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_BUSY */
36838c2ecf20Sopenharmony_ci		nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_RWND_LIMITED */
36848c2ecf20Sopenharmony_ci		nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_SNDBUF_LIMITED */
36858c2ecf20Sopenharmony_ci		nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_DATA_SEGS_OUT */
36868c2ecf20Sopenharmony_ci		nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_TOTAL_RETRANS */
36878c2ecf20Sopenharmony_ci		nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_PACING_RATE */
36888c2ecf20Sopenharmony_ci		nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_DELIVERY_RATE */
36898c2ecf20Sopenharmony_ci		nla_total_size(sizeof(u32)) + /* TCP_NLA_SND_CWND */
36908c2ecf20Sopenharmony_ci		nla_total_size(sizeof(u32)) + /* TCP_NLA_REORDERING */
36918c2ecf20Sopenharmony_ci		nla_total_size(sizeof(u32)) + /* TCP_NLA_MIN_RTT */
36928c2ecf20Sopenharmony_ci		nla_total_size(sizeof(u8)) + /* TCP_NLA_RECUR_RETRANS */
36938c2ecf20Sopenharmony_ci		nla_total_size(sizeof(u8)) + /* TCP_NLA_DELIVERY_RATE_APP_LMT */
36948c2ecf20Sopenharmony_ci		nla_total_size(sizeof(u32)) + /* TCP_NLA_SNDQ_SIZE */
36958c2ecf20Sopenharmony_ci		nla_total_size(sizeof(u8)) + /* TCP_NLA_CA_STATE */
36968c2ecf20Sopenharmony_ci		nla_total_size(sizeof(u32)) + /* TCP_NLA_SND_SSTHRESH */
36978c2ecf20Sopenharmony_ci		nla_total_size(sizeof(u32)) + /* TCP_NLA_DELIVERED */
36988c2ecf20Sopenharmony_ci		nla_total_size(sizeof(u32)) + /* TCP_NLA_DELIVERED_CE */
36998c2ecf20Sopenharmony_ci		nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_BYTES_SENT */
37008c2ecf20Sopenharmony_ci		nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_BYTES_RETRANS */
37018c2ecf20Sopenharmony_ci		nla_total_size(sizeof(u32)) + /* TCP_NLA_DSACK_DUPS */
37028c2ecf20Sopenharmony_ci		nla_total_size(sizeof(u32)) + /* TCP_NLA_REORD_SEEN */
37038c2ecf20Sopenharmony_ci		nla_total_size(sizeof(u32)) + /* TCP_NLA_SRTT */
37048c2ecf20Sopenharmony_ci		nla_total_size(sizeof(u16)) + /* TCP_NLA_TIMEOUT_REHASH */
37058c2ecf20Sopenharmony_ci		nla_total_size(sizeof(u32)) + /* TCP_NLA_BYTES_NOTSENT */
37068c2ecf20Sopenharmony_ci		nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_EDT */
37078c2ecf20Sopenharmony_ci		0;
37088c2ecf20Sopenharmony_ci}
37098c2ecf20Sopenharmony_ci
37108c2ecf20Sopenharmony_cistruct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk,
37118c2ecf20Sopenharmony_ci					       const struct sk_buff *orig_skb)
37128c2ecf20Sopenharmony_ci{
37138c2ecf20Sopenharmony_ci	const struct tcp_sock *tp = tcp_sk(sk);
37148c2ecf20Sopenharmony_ci	struct sk_buff *stats;
37158c2ecf20Sopenharmony_ci	struct tcp_info info;
37168c2ecf20Sopenharmony_ci	unsigned long rate;
37178c2ecf20Sopenharmony_ci	u64 rate64;
37188c2ecf20Sopenharmony_ci
37198c2ecf20Sopenharmony_ci	stats = alloc_skb(tcp_opt_stats_get_size(), GFP_ATOMIC);
37208c2ecf20Sopenharmony_ci	if (!stats)
37218c2ecf20Sopenharmony_ci		return NULL;
37228c2ecf20Sopenharmony_ci
37238c2ecf20Sopenharmony_ci	tcp_get_info_chrono_stats(tp, &info);
37248c2ecf20Sopenharmony_ci	nla_put_u64_64bit(stats, TCP_NLA_BUSY,
37258c2ecf20Sopenharmony_ci			  info.tcpi_busy_time, TCP_NLA_PAD);
37268c2ecf20Sopenharmony_ci	nla_put_u64_64bit(stats, TCP_NLA_RWND_LIMITED,
37278c2ecf20Sopenharmony_ci			  info.tcpi_rwnd_limited, TCP_NLA_PAD);
37288c2ecf20Sopenharmony_ci	nla_put_u64_64bit(stats, TCP_NLA_SNDBUF_LIMITED,
37298c2ecf20Sopenharmony_ci			  info.tcpi_sndbuf_limited, TCP_NLA_PAD);
37308c2ecf20Sopenharmony_ci	nla_put_u64_64bit(stats, TCP_NLA_DATA_SEGS_OUT,
37318c2ecf20Sopenharmony_ci			  tp->data_segs_out, TCP_NLA_PAD);
37328c2ecf20Sopenharmony_ci	nla_put_u64_64bit(stats, TCP_NLA_TOTAL_RETRANS,
37338c2ecf20Sopenharmony_ci			  tp->total_retrans, TCP_NLA_PAD);
37348c2ecf20Sopenharmony_ci
37358c2ecf20Sopenharmony_ci	rate = READ_ONCE(sk->sk_pacing_rate);
37368c2ecf20Sopenharmony_ci	rate64 = (rate != ~0UL) ? rate : ~0ULL;
37378c2ecf20Sopenharmony_ci	nla_put_u64_64bit(stats, TCP_NLA_PACING_RATE, rate64, TCP_NLA_PAD);
37388c2ecf20Sopenharmony_ci
37398c2ecf20Sopenharmony_ci	rate64 = tcp_compute_delivery_rate(tp);
37408c2ecf20Sopenharmony_ci	nla_put_u64_64bit(stats, TCP_NLA_DELIVERY_RATE, rate64, TCP_NLA_PAD);
37418c2ecf20Sopenharmony_ci
37428c2ecf20Sopenharmony_ci	nla_put_u32(stats, TCP_NLA_SND_CWND, tp->snd_cwnd);
37438c2ecf20Sopenharmony_ci	nla_put_u32(stats, TCP_NLA_REORDERING, tp->reordering);
37448c2ecf20Sopenharmony_ci	nla_put_u32(stats, TCP_NLA_MIN_RTT, tcp_min_rtt(tp));
37458c2ecf20Sopenharmony_ci
37468c2ecf20Sopenharmony_ci	nla_put_u8(stats, TCP_NLA_RECUR_RETRANS, inet_csk(sk)->icsk_retransmits);
37478c2ecf20Sopenharmony_ci	nla_put_u8(stats, TCP_NLA_DELIVERY_RATE_APP_LMT, !!tp->rate_app_limited);
37488c2ecf20Sopenharmony_ci	nla_put_u32(stats, TCP_NLA_SND_SSTHRESH, tp->snd_ssthresh);
37498c2ecf20Sopenharmony_ci	nla_put_u32(stats, TCP_NLA_DELIVERED, tp->delivered);
37508c2ecf20Sopenharmony_ci	nla_put_u32(stats, TCP_NLA_DELIVERED_CE, tp->delivered_ce);
37518c2ecf20Sopenharmony_ci
37528c2ecf20Sopenharmony_ci	nla_put_u32(stats, TCP_NLA_SNDQ_SIZE, tp->write_seq - tp->snd_una);
37538c2ecf20Sopenharmony_ci	nla_put_u8(stats, TCP_NLA_CA_STATE, inet_csk(sk)->icsk_ca_state);
37548c2ecf20Sopenharmony_ci
37558c2ecf20Sopenharmony_ci	nla_put_u64_64bit(stats, TCP_NLA_BYTES_SENT, tp->bytes_sent,
37568c2ecf20Sopenharmony_ci			  TCP_NLA_PAD);
37578c2ecf20Sopenharmony_ci	nla_put_u64_64bit(stats, TCP_NLA_BYTES_RETRANS, tp->bytes_retrans,
37588c2ecf20Sopenharmony_ci			  TCP_NLA_PAD);
37598c2ecf20Sopenharmony_ci	nla_put_u32(stats, TCP_NLA_DSACK_DUPS, tp->dsack_dups);
37608c2ecf20Sopenharmony_ci	nla_put_u32(stats, TCP_NLA_REORD_SEEN, tp->reord_seen);
37618c2ecf20Sopenharmony_ci	nla_put_u32(stats, TCP_NLA_SRTT, tp->srtt_us >> 3);
37628c2ecf20Sopenharmony_ci	nla_put_u16(stats, TCP_NLA_TIMEOUT_REHASH, tp->timeout_rehash);
37638c2ecf20Sopenharmony_ci	nla_put_u32(stats, TCP_NLA_BYTES_NOTSENT,
37648c2ecf20Sopenharmony_ci		    max_t(int, 0, tp->write_seq - tp->snd_nxt));
37658c2ecf20Sopenharmony_ci	nla_put_u64_64bit(stats, TCP_NLA_EDT, orig_skb->skb_mstamp_ns,
37668c2ecf20Sopenharmony_ci			  TCP_NLA_PAD);
37678c2ecf20Sopenharmony_ci
37688c2ecf20Sopenharmony_ci	return stats;
37698c2ecf20Sopenharmony_ci}
37708c2ecf20Sopenharmony_ci
37718c2ecf20Sopenharmony_cistatic int do_tcp_getsockopt(struct sock *sk, int level,
37728c2ecf20Sopenharmony_ci		int optname, char __user *optval, int __user *optlen)
37738c2ecf20Sopenharmony_ci{
37748c2ecf20Sopenharmony_ci	struct inet_connection_sock *icsk = inet_csk(sk);
37758c2ecf20Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
37768c2ecf20Sopenharmony_ci	struct net *net = sock_net(sk);
37778c2ecf20Sopenharmony_ci	int val, len;
37788c2ecf20Sopenharmony_ci
37798c2ecf20Sopenharmony_ci	if (get_user(len, optlen))
37808c2ecf20Sopenharmony_ci		return -EFAULT;
37818c2ecf20Sopenharmony_ci
37828c2ecf20Sopenharmony_ci	len = min_t(unsigned int, len, sizeof(int));
37838c2ecf20Sopenharmony_ci
37848c2ecf20Sopenharmony_ci	if (len < 0)
37858c2ecf20Sopenharmony_ci		return -EINVAL;
37868c2ecf20Sopenharmony_ci
37878c2ecf20Sopenharmony_ci	switch (optname) {
37888c2ecf20Sopenharmony_ci	case TCP_MAXSEG:
37898c2ecf20Sopenharmony_ci		val = tp->mss_cache;
37908c2ecf20Sopenharmony_ci		if (tp->rx_opt.user_mss &&
37918c2ecf20Sopenharmony_ci		    ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)))
37928c2ecf20Sopenharmony_ci			val = tp->rx_opt.user_mss;
37938c2ecf20Sopenharmony_ci		if (tp->repair)
37948c2ecf20Sopenharmony_ci			val = tp->rx_opt.mss_clamp;
37958c2ecf20Sopenharmony_ci		break;
37968c2ecf20Sopenharmony_ci	case TCP_NODELAY:
37978c2ecf20Sopenharmony_ci		val = !!(tp->nonagle&TCP_NAGLE_OFF);
37988c2ecf20Sopenharmony_ci		break;
37998c2ecf20Sopenharmony_ci	case TCP_CORK:
38008c2ecf20Sopenharmony_ci		val = !!(tp->nonagle&TCP_NAGLE_CORK);
38018c2ecf20Sopenharmony_ci		break;
38028c2ecf20Sopenharmony_ci	case TCP_KEEPIDLE:
38038c2ecf20Sopenharmony_ci		val = keepalive_time_when(tp) / HZ;
38048c2ecf20Sopenharmony_ci		break;
38058c2ecf20Sopenharmony_ci	case TCP_KEEPINTVL:
38068c2ecf20Sopenharmony_ci		val = keepalive_intvl_when(tp) / HZ;
38078c2ecf20Sopenharmony_ci		break;
38088c2ecf20Sopenharmony_ci	case TCP_KEEPCNT:
38098c2ecf20Sopenharmony_ci		val = keepalive_probes(tp);
38108c2ecf20Sopenharmony_ci		break;
38118c2ecf20Sopenharmony_ci	case TCP_SYNCNT:
38128c2ecf20Sopenharmony_ci		val = READ_ONCE(icsk->icsk_syn_retries) ? :
38138c2ecf20Sopenharmony_ci			READ_ONCE(net->ipv4.sysctl_tcp_syn_retries);
38148c2ecf20Sopenharmony_ci		break;
38158c2ecf20Sopenharmony_ci	case TCP_LINGER2:
38168c2ecf20Sopenharmony_ci		val = READ_ONCE(tp->linger2);
38178c2ecf20Sopenharmony_ci		if (val >= 0)
38188c2ecf20Sopenharmony_ci			val = (val ? : READ_ONCE(net->ipv4.sysctl_tcp_fin_timeout)) / HZ;
38198c2ecf20Sopenharmony_ci		break;
38208c2ecf20Sopenharmony_ci	case TCP_DEFER_ACCEPT:
38218c2ecf20Sopenharmony_ci		val = READ_ONCE(icsk->icsk_accept_queue.rskq_defer_accept);
38228c2ecf20Sopenharmony_ci		val = retrans_to_secs(val, TCP_TIMEOUT_INIT / HZ,
38238c2ecf20Sopenharmony_ci				      TCP_RTO_MAX / HZ);
38248c2ecf20Sopenharmony_ci		break;
38258c2ecf20Sopenharmony_ci	case TCP_WINDOW_CLAMP:
38268c2ecf20Sopenharmony_ci		val = tp->window_clamp;
38278c2ecf20Sopenharmony_ci		break;
38288c2ecf20Sopenharmony_ci	case TCP_INFO: {
38298c2ecf20Sopenharmony_ci		struct tcp_info info;
38308c2ecf20Sopenharmony_ci
38318c2ecf20Sopenharmony_ci		if (get_user(len, optlen))
38328c2ecf20Sopenharmony_ci			return -EFAULT;
38338c2ecf20Sopenharmony_ci
38348c2ecf20Sopenharmony_ci		tcp_get_info(sk, &info);
38358c2ecf20Sopenharmony_ci
38368c2ecf20Sopenharmony_ci		len = min_t(unsigned int, len, sizeof(info));
38378c2ecf20Sopenharmony_ci		if (put_user(len, optlen))
38388c2ecf20Sopenharmony_ci			return -EFAULT;
38398c2ecf20Sopenharmony_ci		if (copy_to_user(optval, &info, len))
38408c2ecf20Sopenharmony_ci			return -EFAULT;
38418c2ecf20Sopenharmony_ci		return 0;
38428c2ecf20Sopenharmony_ci	}
38438c2ecf20Sopenharmony_ci	case TCP_CC_INFO: {
38448c2ecf20Sopenharmony_ci		const struct tcp_congestion_ops *ca_ops;
38458c2ecf20Sopenharmony_ci		union tcp_cc_info info;
38468c2ecf20Sopenharmony_ci		size_t sz = 0;
38478c2ecf20Sopenharmony_ci		int attr;
38488c2ecf20Sopenharmony_ci
38498c2ecf20Sopenharmony_ci		if (get_user(len, optlen))
38508c2ecf20Sopenharmony_ci			return -EFAULT;
38518c2ecf20Sopenharmony_ci
38528c2ecf20Sopenharmony_ci		ca_ops = icsk->icsk_ca_ops;
38538c2ecf20Sopenharmony_ci		if (ca_ops && ca_ops->get_info)
38548c2ecf20Sopenharmony_ci			sz = ca_ops->get_info(sk, ~0U, &attr, &info);
38558c2ecf20Sopenharmony_ci
38568c2ecf20Sopenharmony_ci		len = min_t(unsigned int, len, sz);
38578c2ecf20Sopenharmony_ci		if (put_user(len, optlen))
38588c2ecf20Sopenharmony_ci			return -EFAULT;
38598c2ecf20Sopenharmony_ci		if (copy_to_user(optval, &info, len))
38608c2ecf20Sopenharmony_ci			return -EFAULT;
38618c2ecf20Sopenharmony_ci		return 0;
38628c2ecf20Sopenharmony_ci	}
38638c2ecf20Sopenharmony_ci	case TCP_QUICKACK:
38648c2ecf20Sopenharmony_ci		val = !inet_csk_in_pingpong_mode(sk);
38658c2ecf20Sopenharmony_ci		break;
38668c2ecf20Sopenharmony_ci
38678c2ecf20Sopenharmony_ci	case TCP_CONGESTION:
38688c2ecf20Sopenharmony_ci		if (get_user(len, optlen))
38698c2ecf20Sopenharmony_ci			return -EFAULT;
38708c2ecf20Sopenharmony_ci		len = min_t(unsigned int, len, TCP_CA_NAME_MAX);
38718c2ecf20Sopenharmony_ci		if (put_user(len, optlen))
38728c2ecf20Sopenharmony_ci			return -EFAULT;
38738c2ecf20Sopenharmony_ci		if (copy_to_user(optval, icsk->icsk_ca_ops->name, len))
38748c2ecf20Sopenharmony_ci			return -EFAULT;
38758c2ecf20Sopenharmony_ci		return 0;
38768c2ecf20Sopenharmony_ci
38778c2ecf20Sopenharmony_ci	case TCP_ULP:
38788c2ecf20Sopenharmony_ci		if (get_user(len, optlen))
38798c2ecf20Sopenharmony_ci			return -EFAULT;
38808c2ecf20Sopenharmony_ci		len = min_t(unsigned int, len, TCP_ULP_NAME_MAX);
38818c2ecf20Sopenharmony_ci		if (!icsk->icsk_ulp_ops) {
38828c2ecf20Sopenharmony_ci			if (put_user(0, optlen))
38838c2ecf20Sopenharmony_ci				return -EFAULT;
38848c2ecf20Sopenharmony_ci			return 0;
38858c2ecf20Sopenharmony_ci		}
38868c2ecf20Sopenharmony_ci		if (put_user(len, optlen))
38878c2ecf20Sopenharmony_ci			return -EFAULT;
38888c2ecf20Sopenharmony_ci		if (copy_to_user(optval, icsk->icsk_ulp_ops->name, len))
38898c2ecf20Sopenharmony_ci			return -EFAULT;
38908c2ecf20Sopenharmony_ci		return 0;
38918c2ecf20Sopenharmony_ci
38928c2ecf20Sopenharmony_ci	case TCP_FASTOPEN_KEY: {
38938c2ecf20Sopenharmony_ci		u64 key[TCP_FASTOPEN_KEY_BUF_LENGTH / sizeof(u64)];
38948c2ecf20Sopenharmony_ci		unsigned int key_len;
38958c2ecf20Sopenharmony_ci
38968c2ecf20Sopenharmony_ci		if (get_user(len, optlen))
38978c2ecf20Sopenharmony_ci			return -EFAULT;
38988c2ecf20Sopenharmony_ci
38998c2ecf20Sopenharmony_ci		key_len = tcp_fastopen_get_cipher(net, icsk, key) *
39008c2ecf20Sopenharmony_ci				TCP_FASTOPEN_KEY_LENGTH;
39018c2ecf20Sopenharmony_ci		len = min_t(unsigned int, len, key_len);
39028c2ecf20Sopenharmony_ci		if (put_user(len, optlen))
39038c2ecf20Sopenharmony_ci			return -EFAULT;
39048c2ecf20Sopenharmony_ci		if (copy_to_user(optval, key, len))
39058c2ecf20Sopenharmony_ci			return -EFAULT;
39068c2ecf20Sopenharmony_ci		return 0;
39078c2ecf20Sopenharmony_ci	}
39088c2ecf20Sopenharmony_ci	case TCP_THIN_LINEAR_TIMEOUTS:
39098c2ecf20Sopenharmony_ci		val = tp->thin_lto;
39108c2ecf20Sopenharmony_ci		break;
39118c2ecf20Sopenharmony_ci
39128c2ecf20Sopenharmony_ci	case TCP_THIN_DUPACK:
39138c2ecf20Sopenharmony_ci		val = 0;
39148c2ecf20Sopenharmony_ci		break;
39158c2ecf20Sopenharmony_ci
39168c2ecf20Sopenharmony_ci	case TCP_REPAIR:
39178c2ecf20Sopenharmony_ci		val = tp->repair;
39188c2ecf20Sopenharmony_ci		break;
39198c2ecf20Sopenharmony_ci
39208c2ecf20Sopenharmony_ci	case TCP_REPAIR_QUEUE:
39218c2ecf20Sopenharmony_ci		if (tp->repair)
39228c2ecf20Sopenharmony_ci			val = tp->repair_queue;
39238c2ecf20Sopenharmony_ci		else
39248c2ecf20Sopenharmony_ci			return -EINVAL;
39258c2ecf20Sopenharmony_ci		break;
39268c2ecf20Sopenharmony_ci
39278c2ecf20Sopenharmony_ci	case TCP_REPAIR_WINDOW: {
39288c2ecf20Sopenharmony_ci		struct tcp_repair_window opt;
39298c2ecf20Sopenharmony_ci
39308c2ecf20Sopenharmony_ci		if (get_user(len, optlen))
39318c2ecf20Sopenharmony_ci			return -EFAULT;
39328c2ecf20Sopenharmony_ci
39338c2ecf20Sopenharmony_ci		if (len != sizeof(opt))
39348c2ecf20Sopenharmony_ci			return -EINVAL;
39358c2ecf20Sopenharmony_ci
39368c2ecf20Sopenharmony_ci		if (!tp->repair)
39378c2ecf20Sopenharmony_ci			return -EPERM;
39388c2ecf20Sopenharmony_ci
39398c2ecf20Sopenharmony_ci		opt.snd_wl1	= tp->snd_wl1;
39408c2ecf20Sopenharmony_ci		opt.snd_wnd	= tp->snd_wnd;
39418c2ecf20Sopenharmony_ci		opt.max_window	= tp->max_window;
39428c2ecf20Sopenharmony_ci		opt.rcv_wnd	= tp->rcv_wnd;
39438c2ecf20Sopenharmony_ci		opt.rcv_wup	= tp->rcv_wup;
39448c2ecf20Sopenharmony_ci
39458c2ecf20Sopenharmony_ci		if (copy_to_user(optval, &opt, len))
39468c2ecf20Sopenharmony_ci			return -EFAULT;
39478c2ecf20Sopenharmony_ci		return 0;
39488c2ecf20Sopenharmony_ci	}
39498c2ecf20Sopenharmony_ci	case TCP_QUEUE_SEQ:
39508c2ecf20Sopenharmony_ci		if (tp->repair_queue == TCP_SEND_QUEUE)
39518c2ecf20Sopenharmony_ci			val = tp->write_seq;
39528c2ecf20Sopenharmony_ci		else if (tp->repair_queue == TCP_RECV_QUEUE)
39538c2ecf20Sopenharmony_ci			val = tp->rcv_nxt;
39548c2ecf20Sopenharmony_ci		else
39558c2ecf20Sopenharmony_ci			return -EINVAL;
39568c2ecf20Sopenharmony_ci		break;
39578c2ecf20Sopenharmony_ci
39588c2ecf20Sopenharmony_ci	case TCP_USER_TIMEOUT:
39598c2ecf20Sopenharmony_ci		val = READ_ONCE(icsk->icsk_user_timeout);
39608c2ecf20Sopenharmony_ci		break;
39618c2ecf20Sopenharmony_ci
39628c2ecf20Sopenharmony_ci	case TCP_FASTOPEN:
39638c2ecf20Sopenharmony_ci		val = READ_ONCE(icsk->icsk_accept_queue.fastopenq.max_qlen);
39648c2ecf20Sopenharmony_ci		break;
39658c2ecf20Sopenharmony_ci
39668c2ecf20Sopenharmony_ci	case TCP_FASTOPEN_CONNECT:
39678c2ecf20Sopenharmony_ci		val = tp->fastopen_connect;
39688c2ecf20Sopenharmony_ci		break;
39698c2ecf20Sopenharmony_ci
39708c2ecf20Sopenharmony_ci	case TCP_FASTOPEN_NO_COOKIE:
39718c2ecf20Sopenharmony_ci		val = tp->fastopen_no_cookie;
39728c2ecf20Sopenharmony_ci		break;
39738c2ecf20Sopenharmony_ci
39748c2ecf20Sopenharmony_ci	case TCP_TX_DELAY:
39758c2ecf20Sopenharmony_ci		val = READ_ONCE(tp->tcp_tx_delay);
39768c2ecf20Sopenharmony_ci		break;
39778c2ecf20Sopenharmony_ci
39788c2ecf20Sopenharmony_ci	case TCP_TIMESTAMP:
39798c2ecf20Sopenharmony_ci		val = tcp_time_stamp_raw() + tp->tsoffset;
39808c2ecf20Sopenharmony_ci		break;
39818c2ecf20Sopenharmony_ci	case TCP_NOTSENT_LOWAT:
39828c2ecf20Sopenharmony_ci		val = READ_ONCE(tp->notsent_lowat);
39838c2ecf20Sopenharmony_ci		break;
39848c2ecf20Sopenharmony_ci	case TCP_INQ:
39858c2ecf20Sopenharmony_ci		val = tp->recvmsg_inq;
39868c2ecf20Sopenharmony_ci		break;
39878c2ecf20Sopenharmony_ci	case TCP_SAVE_SYN:
39888c2ecf20Sopenharmony_ci		val = tp->save_syn;
39898c2ecf20Sopenharmony_ci		break;
39908c2ecf20Sopenharmony_ci	case TCP_SAVED_SYN: {
39918c2ecf20Sopenharmony_ci		if (get_user(len, optlen))
39928c2ecf20Sopenharmony_ci			return -EFAULT;
39938c2ecf20Sopenharmony_ci
39948c2ecf20Sopenharmony_ci		lock_sock(sk);
39958c2ecf20Sopenharmony_ci		if (tp->saved_syn) {
39968c2ecf20Sopenharmony_ci			if (len < tcp_saved_syn_len(tp->saved_syn)) {
39978c2ecf20Sopenharmony_ci				if (put_user(tcp_saved_syn_len(tp->saved_syn),
39988c2ecf20Sopenharmony_ci					     optlen)) {
39998c2ecf20Sopenharmony_ci					release_sock(sk);
40008c2ecf20Sopenharmony_ci					return -EFAULT;
40018c2ecf20Sopenharmony_ci				}
40028c2ecf20Sopenharmony_ci				release_sock(sk);
40038c2ecf20Sopenharmony_ci				return -EINVAL;
40048c2ecf20Sopenharmony_ci			}
40058c2ecf20Sopenharmony_ci			len = tcp_saved_syn_len(tp->saved_syn);
40068c2ecf20Sopenharmony_ci			if (put_user(len, optlen)) {
40078c2ecf20Sopenharmony_ci				release_sock(sk);
40088c2ecf20Sopenharmony_ci				return -EFAULT;
40098c2ecf20Sopenharmony_ci			}
40108c2ecf20Sopenharmony_ci			if (copy_to_user(optval, tp->saved_syn->data, len)) {
40118c2ecf20Sopenharmony_ci				release_sock(sk);
40128c2ecf20Sopenharmony_ci				return -EFAULT;
40138c2ecf20Sopenharmony_ci			}
40148c2ecf20Sopenharmony_ci			tcp_saved_syn_free(tp);
40158c2ecf20Sopenharmony_ci			release_sock(sk);
40168c2ecf20Sopenharmony_ci		} else {
40178c2ecf20Sopenharmony_ci			release_sock(sk);
40188c2ecf20Sopenharmony_ci			len = 0;
40198c2ecf20Sopenharmony_ci			if (put_user(len, optlen))
40208c2ecf20Sopenharmony_ci				return -EFAULT;
40218c2ecf20Sopenharmony_ci		}
40228c2ecf20Sopenharmony_ci		return 0;
40238c2ecf20Sopenharmony_ci	}
40248c2ecf20Sopenharmony_ci#ifdef CONFIG_MMU
40258c2ecf20Sopenharmony_ci	case TCP_ZEROCOPY_RECEIVE: {
40268c2ecf20Sopenharmony_ci		struct tcp_zerocopy_receive zc = {};
40278c2ecf20Sopenharmony_ci		int err;
40288c2ecf20Sopenharmony_ci
40298c2ecf20Sopenharmony_ci		if (get_user(len, optlen))
40308c2ecf20Sopenharmony_ci			return -EFAULT;
40318c2ecf20Sopenharmony_ci		if (len < 0 ||
40328c2ecf20Sopenharmony_ci		    len < offsetofend(struct tcp_zerocopy_receive, length))
40338c2ecf20Sopenharmony_ci			return -EINVAL;
40348c2ecf20Sopenharmony_ci		if (len > sizeof(zc)) {
40358c2ecf20Sopenharmony_ci			len = sizeof(zc);
40368c2ecf20Sopenharmony_ci			if (put_user(len, optlen))
40378c2ecf20Sopenharmony_ci				return -EFAULT;
40388c2ecf20Sopenharmony_ci		}
40398c2ecf20Sopenharmony_ci		if (copy_from_user(&zc, optval, len))
40408c2ecf20Sopenharmony_ci			return -EFAULT;
40418c2ecf20Sopenharmony_ci		lock_sock(sk);
40428c2ecf20Sopenharmony_ci		err = tcp_zerocopy_receive(sk, &zc);
40438c2ecf20Sopenharmony_ci		err = BPF_CGROUP_RUN_PROG_GETSOCKOPT_KERN(sk, level, optname,
40448c2ecf20Sopenharmony_ci							  &zc, &len, err);
40458c2ecf20Sopenharmony_ci		release_sock(sk);
40468c2ecf20Sopenharmony_ci		if (len >= offsetofend(struct tcp_zerocopy_receive, err))
40478c2ecf20Sopenharmony_ci			goto zerocopy_rcv_sk_err;
40488c2ecf20Sopenharmony_ci		switch (len) {
40498c2ecf20Sopenharmony_ci		case offsetofend(struct tcp_zerocopy_receive, err):
40508c2ecf20Sopenharmony_ci			goto zerocopy_rcv_sk_err;
40518c2ecf20Sopenharmony_ci		case offsetofend(struct tcp_zerocopy_receive, inq):
40528c2ecf20Sopenharmony_ci			goto zerocopy_rcv_inq;
40538c2ecf20Sopenharmony_ci		case offsetofend(struct tcp_zerocopy_receive, length):
40548c2ecf20Sopenharmony_ci		default:
40558c2ecf20Sopenharmony_ci			goto zerocopy_rcv_out;
40568c2ecf20Sopenharmony_ci		}
40578c2ecf20Sopenharmony_cizerocopy_rcv_sk_err:
40588c2ecf20Sopenharmony_ci		if (!err)
40598c2ecf20Sopenharmony_ci			zc.err = sock_error(sk);
40608c2ecf20Sopenharmony_cizerocopy_rcv_inq:
40618c2ecf20Sopenharmony_ci		zc.inq = tcp_inq_hint(sk);
40628c2ecf20Sopenharmony_cizerocopy_rcv_out:
40638c2ecf20Sopenharmony_ci		if (!err && copy_to_user(optval, &zc, len))
40648c2ecf20Sopenharmony_ci			err = -EFAULT;
40658c2ecf20Sopenharmony_ci		return err;
40668c2ecf20Sopenharmony_ci	}
40678c2ecf20Sopenharmony_ci#endif
40688c2ecf20Sopenharmony_ci	default:
40698c2ecf20Sopenharmony_ci		return -ENOPROTOOPT;
40708c2ecf20Sopenharmony_ci	}
40718c2ecf20Sopenharmony_ci
40728c2ecf20Sopenharmony_ci	if (put_user(len, optlen))
40738c2ecf20Sopenharmony_ci		return -EFAULT;
40748c2ecf20Sopenharmony_ci	if (copy_to_user(optval, &val, len))
40758c2ecf20Sopenharmony_ci		return -EFAULT;
40768c2ecf20Sopenharmony_ci	return 0;
40778c2ecf20Sopenharmony_ci}
40788c2ecf20Sopenharmony_ci
40798c2ecf20Sopenharmony_cibool tcp_bpf_bypass_getsockopt(int level, int optname)
40808c2ecf20Sopenharmony_ci{
40818c2ecf20Sopenharmony_ci	/* TCP do_tcp_getsockopt has optimized getsockopt implementation
40828c2ecf20Sopenharmony_ci	 * to avoid extra socket lock for TCP_ZEROCOPY_RECEIVE.
40838c2ecf20Sopenharmony_ci	 */
40848c2ecf20Sopenharmony_ci	if (level == SOL_TCP && optname == TCP_ZEROCOPY_RECEIVE)
40858c2ecf20Sopenharmony_ci		return true;
40868c2ecf20Sopenharmony_ci
40878c2ecf20Sopenharmony_ci	return false;
40888c2ecf20Sopenharmony_ci}
40898c2ecf20Sopenharmony_ciEXPORT_SYMBOL(tcp_bpf_bypass_getsockopt);
40908c2ecf20Sopenharmony_ci
40918c2ecf20Sopenharmony_ciint tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
40928c2ecf20Sopenharmony_ci		   int __user *optlen)
40938c2ecf20Sopenharmony_ci{
40948c2ecf20Sopenharmony_ci	struct inet_connection_sock *icsk = inet_csk(sk);
40958c2ecf20Sopenharmony_ci
40968c2ecf20Sopenharmony_ci	if (level != SOL_TCP)
40978c2ecf20Sopenharmony_ci		/* Paired with WRITE_ONCE() in do_ipv6_setsockopt() and tcp_v6_connect() */
40988c2ecf20Sopenharmony_ci		return READ_ONCE(icsk->icsk_af_ops)->getsockopt(sk, level, optname,
40998c2ecf20Sopenharmony_ci								optval, optlen);
41008c2ecf20Sopenharmony_ci	return do_tcp_getsockopt(sk, level, optname, optval, optlen);
41018c2ecf20Sopenharmony_ci}
41028c2ecf20Sopenharmony_ciEXPORT_SYMBOL(tcp_getsockopt);
41038c2ecf20Sopenharmony_ci
41048c2ecf20Sopenharmony_ci#ifdef CONFIG_TCP_MD5SIG
41058c2ecf20Sopenharmony_cistatic DEFINE_PER_CPU(struct tcp_md5sig_pool, tcp_md5sig_pool);
41068c2ecf20Sopenharmony_cistatic DEFINE_MUTEX(tcp_md5sig_mutex);
41078c2ecf20Sopenharmony_cistatic bool tcp_md5sig_pool_populated = false;
41088c2ecf20Sopenharmony_ci
41098c2ecf20Sopenharmony_cistatic void __tcp_alloc_md5sig_pool(void)
41108c2ecf20Sopenharmony_ci{
41118c2ecf20Sopenharmony_ci	struct crypto_ahash *hash;
41128c2ecf20Sopenharmony_ci	int cpu;
41138c2ecf20Sopenharmony_ci
41148c2ecf20Sopenharmony_ci	hash = crypto_alloc_ahash("md5", 0, CRYPTO_ALG_ASYNC);
41158c2ecf20Sopenharmony_ci	if (IS_ERR(hash))
41168c2ecf20Sopenharmony_ci		return;
41178c2ecf20Sopenharmony_ci
41188c2ecf20Sopenharmony_ci	for_each_possible_cpu(cpu) {
41198c2ecf20Sopenharmony_ci		void *scratch = per_cpu(tcp_md5sig_pool, cpu).scratch;
41208c2ecf20Sopenharmony_ci		struct ahash_request *req;
41218c2ecf20Sopenharmony_ci
41228c2ecf20Sopenharmony_ci		if (!scratch) {
41238c2ecf20Sopenharmony_ci			scratch = kmalloc_node(sizeof(union tcp_md5sum_block) +
41248c2ecf20Sopenharmony_ci					       sizeof(struct tcphdr),
41258c2ecf20Sopenharmony_ci					       GFP_KERNEL,
41268c2ecf20Sopenharmony_ci					       cpu_to_node(cpu));
41278c2ecf20Sopenharmony_ci			if (!scratch)
41288c2ecf20Sopenharmony_ci				return;
41298c2ecf20Sopenharmony_ci			per_cpu(tcp_md5sig_pool, cpu).scratch = scratch;
41308c2ecf20Sopenharmony_ci		}
41318c2ecf20Sopenharmony_ci		if (per_cpu(tcp_md5sig_pool, cpu).md5_req)
41328c2ecf20Sopenharmony_ci			continue;
41338c2ecf20Sopenharmony_ci
41348c2ecf20Sopenharmony_ci		req = ahash_request_alloc(hash, GFP_KERNEL);
41358c2ecf20Sopenharmony_ci		if (!req)
41368c2ecf20Sopenharmony_ci			return;
41378c2ecf20Sopenharmony_ci
41388c2ecf20Sopenharmony_ci		ahash_request_set_callback(req, 0, NULL, NULL);
41398c2ecf20Sopenharmony_ci
41408c2ecf20Sopenharmony_ci		per_cpu(tcp_md5sig_pool, cpu).md5_req = req;
41418c2ecf20Sopenharmony_ci	}
41428c2ecf20Sopenharmony_ci	/* before setting tcp_md5sig_pool_populated, we must commit all writes
41438c2ecf20Sopenharmony_ci	 * to memory. See smp_rmb() in tcp_get_md5sig_pool()
41448c2ecf20Sopenharmony_ci	 */
41458c2ecf20Sopenharmony_ci	smp_wmb();
41468c2ecf20Sopenharmony_ci	/* Paired with READ_ONCE() from tcp_alloc_md5sig_pool()
41478c2ecf20Sopenharmony_ci	 * and tcp_get_md5sig_pool().
41488c2ecf20Sopenharmony_ci	*/
41498c2ecf20Sopenharmony_ci	WRITE_ONCE(tcp_md5sig_pool_populated, true);
41508c2ecf20Sopenharmony_ci}
41518c2ecf20Sopenharmony_ci
41528c2ecf20Sopenharmony_cibool tcp_alloc_md5sig_pool(void)
41538c2ecf20Sopenharmony_ci{
41548c2ecf20Sopenharmony_ci	/* Paired with WRITE_ONCE() from __tcp_alloc_md5sig_pool() */
41558c2ecf20Sopenharmony_ci	if (unlikely(!READ_ONCE(tcp_md5sig_pool_populated))) {
41568c2ecf20Sopenharmony_ci		mutex_lock(&tcp_md5sig_mutex);
41578c2ecf20Sopenharmony_ci
41588c2ecf20Sopenharmony_ci		if (!tcp_md5sig_pool_populated) {
41598c2ecf20Sopenharmony_ci			__tcp_alloc_md5sig_pool();
41608c2ecf20Sopenharmony_ci			if (tcp_md5sig_pool_populated)
41618c2ecf20Sopenharmony_ci				static_branch_inc(&tcp_md5_needed);
41628c2ecf20Sopenharmony_ci		}
41638c2ecf20Sopenharmony_ci
41648c2ecf20Sopenharmony_ci		mutex_unlock(&tcp_md5sig_mutex);
41658c2ecf20Sopenharmony_ci	}
41668c2ecf20Sopenharmony_ci	/* Paired with WRITE_ONCE() from __tcp_alloc_md5sig_pool() */
41678c2ecf20Sopenharmony_ci	return READ_ONCE(tcp_md5sig_pool_populated);
41688c2ecf20Sopenharmony_ci}
41698c2ecf20Sopenharmony_ciEXPORT_SYMBOL(tcp_alloc_md5sig_pool);
41708c2ecf20Sopenharmony_ci
41718c2ecf20Sopenharmony_ci
41728c2ecf20Sopenharmony_ci/**
41738c2ecf20Sopenharmony_ci *	tcp_get_md5sig_pool - get md5sig_pool for this user
41748c2ecf20Sopenharmony_ci *
41758c2ecf20Sopenharmony_ci *	We use percpu structure, so if we succeed, we exit with preemption
41768c2ecf20Sopenharmony_ci *	and BH disabled, to make sure another thread or softirq handling
41778c2ecf20Sopenharmony_ci *	wont try to get same context.
41788c2ecf20Sopenharmony_ci */
41798c2ecf20Sopenharmony_cistruct tcp_md5sig_pool *tcp_get_md5sig_pool(void)
41808c2ecf20Sopenharmony_ci{
41818c2ecf20Sopenharmony_ci	local_bh_disable();
41828c2ecf20Sopenharmony_ci
41838c2ecf20Sopenharmony_ci	/* Paired with WRITE_ONCE() from __tcp_alloc_md5sig_pool() */
41848c2ecf20Sopenharmony_ci	if (READ_ONCE(tcp_md5sig_pool_populated)) {
41858c2ecf20Sopenharmony_ci		/* coupled with smp_wmb() in __tcp_alloc_md5sig_pool() */
41868c2ecf20Sopenharmony_ci		smp_rmb();
41878c2ecf20Sopenharmony_ci		return this_cpu_ptr(&tcp_md5sig_pool);
41888c2ecf20Sopenharmony_ci	}
41898c2ecf20Sopenharmony_ci	local_bh_enable();
41908c2ecf20Sopenharmony_ci	return NULL;
41918c2ecf20Sopenharmony_ci}
41928c2ecf20Sopenharmony_ciEXPORT_SYMBOL(tcp_get_md5sig_pool);
41938c2ecf20Sopenharmony_ci
41948c2ecf20Sopenharmony_ciint tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp,
41958c2ecf20Sopenharmony_ci			  const struct sk_buff *skb, unsigned int header_len)
41968c2ecf20Sopenharmony_ci{
41978c2ecf20Sopenharmony_ci	struct scatterlist sg;
41988c2ecf20Sopenharmony_ci	const struct tcphdr *tp = tcp_hdr(skb);
41998c2ecf20Sopenharmony_ci	struct ahash_request *req = hp->md5_req;
42008c2ecf20Sopenharmony_ci	unsigned int i;
42018c2ecf20Sopenharmony_ci	const unsigned int head_data_len = skb_headlen(skb) > header_len ?
42028c2ecf20Sopenharmony_ci					   skb_headlen(skb) - header_len : 0;
42038c2ecf20Sopenharmony_ci	const struct skb_shared_info *shi = skb_shinfo(skb);
42048c2ecf20Sopenharmony_ci	struct sk_buff *frag_iter;
42058c2ecf20Sopenharmony_ci
42068c2ecf20Sopenharmony_ci	sg_init_table(&sg, 1);
42078c2ecf20Sopenharmony_ci
42088c2ecf20Sopenharmony_ci	sg_set_buf(&sg, ((u8 *) tp) + header_len, head_data_len);
42098c2ecf20Sopenharmony_ci	ahash_request_set_crypt(req, &sg, NULL, head_data_len);
42108c2ecf20Sopenharmony_ci	if (crypto_ahash_update(req))
42118c2ecf20Sopenharmony_ci		return 1;
42128c2ecf20Sopenharmony_ci
42138c2ecf20Sopenharmony_ci	for (i = 0; i < shi->nr_frags; ++i) {
42148c2ecf20Sopenharmony_ci		const skb_frag_t *f = &shi->frags[i];
42158c2ecf20Sopenharmony_ci		unsigned int offset = skb_frag_off(f);
42168c2ecf20Sopenharmony_ci		struct page *page = skb_frag_page(f) + (offset >> PAGE_SHIFT);
42178c2ecf20Sopenharmony_ci
42188c2ecf20Sopenharmony_ci		sg_set_page(&sg, page, skb_frag_size(f),
42198c2ecf20Sopenharmony_ci			    offset_in_page(offset));
42208c2ecf20Sopenharmony_ci		ahash_request_set_crypt(req, &sg, NULL, skb_frag_size(f));
42218c2ecf20Sopenharmony_ci		if (crypto_ahash_update(req))
42228c2ecf20Sopenharmony_ci			return 1;
42238c2ecf20Sopenharmony_ci	}
42248c2ecf20Sopenharmony_ci
42258c2ecf20Sopenharmony_ci	skb_walk_frags(skb, frag_iter)
42268c2ecf20Sopenharmony_ci		if (tcp_md5_hash_skb_data(hp, frag_iter, 0))
42278c2ecf20Sopenharmony_ci			return 1;
42288c2ecf20Sopenharmony_ci
42298c2ecf20Sopenharmony_ci	return 0;
42308c2ecf20Sopenharmony_ci}
42318c2ecf20Sopenharmony_ciEXPORT_SYMBOL(tcp_md5_hash_skb_data);
42328c2ecf20Sopenharmony_ci
42338c2ecf20Sopenharmony_ciint tcp_md5_hash_key(struct tcp_md5sig_pool *hp, const struct tcp_md5sig_key *key)
42348c2ecf20Sopenharmony_ci{
42358c2ecf20Sopenharmony_ci	u8 keylen = READ_ONCE(key->keylen); /* paired with WRITE_ONCE() in tcp_md5_do_add */
42368c2ecf20Sopenharmony_ci	struct scatterlist sg;
42378c2ecf20Sopenharmony_ci
42388c2ecf20Sopenharmony_ci	sg_init_one(&sg, key->key, keylen);
42398c2ecf20Sopenharmony_ci	ahash_request_set_crypt(hp->md5_req, &sg, NULL, keylen);
42408c2ecf20Sopenharmony_ci
42418c2ecf20Sopenharmony_ci	/* We use data_race() because tcp_md5_do_add() might change key->key under us */
42428c2ecf20Sopenharmony_ci	return data_race(crypto_ahash_update(hp->md5_req));
42438c2ecf20Sopenharmony_ci}
42448c2ecf20Sopenharmony_ciEXPORT_SYMBOL(tcp_md5_hash_key);
42458c2ecf20Sopenharmony_ci
42468c2ecf20Sopenharmony_ci#endif
42478c2ecf20Sopenharmony_ci
42488c2ecf20Sopenharmony_civoid tcp_done(struct sock *sk)
42498c2ecf20Sopenharmony_ci{
42508c2ecf20Sopenharmony_ci	struct request_sock *req;
42518c2ecf20Sopenharmony_ci
42528c2ecf20Sopenharmony_ci	/* We might be called with a new socket, after
42538c2ecf20Sopenharmony_ci	 * inet_csk_prepare_forced_close() has been called
42548c2ecf20Sopenharmony_ci	 * so we can not use lockdep_sock_is_held(sk)
42558c2ecf20Sopenharmony_ci	 */
42568c2ecf20Sopenharmony_ci	req = rcu_dereference_protected(tcp_sk(sk)->fastopen_rsk, 1);
42578c2ecf20Sopenharmony_ci
42588c2ecf20Sopenharmony_ci	if (sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV)
42598c2ecf20Sopenharmony_ci		TCP_INC_STATS(sock_net(sk), TCP_MIB_ATTEMPTFAILS);
42608c2ecf20Sopenharmony_ci
42618c2ecf20Sopenharmony_ci	tcp_set_state(sk, TCP_CLOSE);
42628c2ecf20Sopenharmony_ci	tcp_clear_xmit_timers(sk);
42638c2ecf20Sopenharmony_ci	if (req)
42648c2ecf20Sopenharmony_ci		reqsk_fastopen_remove(sk, req, false);
42658c2ecf20Sopenharmony_ci
42668c2ecf20Sopenharmony_ci	WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK);
42678c2ecf20Sopenharmony_ci
42688c2ecf20Sopenharmony_ci	if (!sock_flag(sk, SOCK_DEAD))
42698c2ecf20Sopenharmony_ci		sk->sk_state_change(sk);
42708c2ecf20Sopenharmony_ci	else
42718c2ecf20Sopenharmony_ci		inet_csk_destroy_sock(sk);
42728c2ecf20Sopenharmony_ci}
42738c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(tcp_done);
42748c2ecf20Sopenharmony_ci
42758c2ecf20Sopenharmony_ciint tcp_abort(struct sock *sk, int err)
42768c2ecf20Sopenharmony_ci{
42778c2ecf20Sopenharmony_ci	if (!sk_fullsock(sk)) {
42788c2ecf20Sopenharmony_ci		if (sk->sk_state == TCP_NEW_SYN_RECV) {
42798c2ecf20Sopenharmony_ci			struct request_sock *req = inet_reqsk(sk);
42808c2ecf20Sopenharmony_ci
42818c2ecf20Sopenharmony_ci			local_bh_disable();
42828c2ecf20Sopenharmony_ci			inet_csk_reqsk_queue_drop(req->rsk_listener, req);
42838c2ecf20Sopenharmony_ci			local_bh_enable();
42848c2ecf20Sopenharmony_ci			return 0;
42858c2ecf20Sopenharmony_ci		}
42868c2ecf20Sopenharmony_ci		return -EOPNOTSUPP;
42878c2ecf20Sopenharmony_ci	}
42888c2ecf20Sopenharmony_ci
42898c2ecf20Sopenharmony_ci	/* Don't race with userspace socket closes such as tcp_close. */
42908c2ecf20Sopenharmony_ci#ifdef CONFIG_TCP_SOCK_DESTROY
42918c2ecf20Sopenharmony_ci	/* BPF context ensures sock locking. */
42928c2ecf20Sopenharmony_ci	if (!has_current_bpf_ctx())
42938c2ecf20Sopenharmony_ci#endif  /* CONFIG_TCP_SOCK_DESTROY */
42948c2ecf20Sopenharmony_ci		lock_sock(sk);
42958c2ecf20Sopenharmony_ci
42968c2ecf20Sopenharmony_ci	if (sk->sk_state == TCP_LISTEN) {
42978c2ecf20Sopenharmony_ci		tcp_set_state(sk, TCP_CLOSE);
42988c2ecf20Sopenharmony_ci		inet_csk_listen_stop(sk);
42998c2ecf20Sopenharmony_ci	}
43008c2ecf20Sopenharmony_ci
43018c2ecf20Sopenharmony_ci	/* Don't race with BH socket closes such as inet_csk_listen_stop. */
43028c2ecf20Sopenharmony_ci	local_bh_disable();
43038c2ecf20Sopenharmony_ci	bh_lock_sock(sk);
43048c2ecf20Sopenharmony_ci
43058c2ecf20Sopenharmony_ci	if (!sock_flag(sk, SOCK_DEAD)) {
43068c2ecf20Sopenharmony_ci		sk->sk_err = err;
43078c2ecf20Sopenharmony_ci		/* This barrier is coupled with smp_rmb() in tcp_poll() */
43088c2ecf20Sopenharmony_ci		smp_wmb();
43098c2ecf20Sopenharmony_ci		sk->sk_error_report(sk);
43108c2ecf20Sopenharmony_ci		if (tcp_need_reset(sk->sk_state))
43118c2ecf20Sopenharmony_ci			tcp_send_active_reset(sk, GFP_ATOMIC);
43128c2ecf20Sopenharmony_ci		tcp_done(sk);
43138c2ecf20Sopenharmony_ci	}
43148c2ecf20Sopenharmony_ci
43158c2ecf20Sopenharmony_ci	bh_unlock_sock(sk);
43168c2ecf20Sopenharmony_ci	local_bh_enable();
43178c2ecf20Sopenharmony_ci	tcp_write_queue_purge(sk);
43188c2ecf20Sopenharmony_ci#ifdef CONFIG_TCP_SOCK_DESTROY
43198c2ecf20Sopenharmony_ci	if (!has_current_bpf_ctx())
43208c2ecf20Sopenharmony_ci#endif  /* CONFIG_TCP_SOCK_DESTROY */
43218c2ecf20Sopenharmony_ci		release_sock(sk);
43228c2ecf20Sopenharmony_ci	return 0;
43238c2ecf20Sopenharmony_ci}
43248c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(tcp_abort);
43258c2ecf20Sopenharmony_ci
43268c2ecf20Sopenharmony_ciextern struct tcp_congestion_ops tcp_reno;
43278c2ecf20Sopenharmony_ci
43288c2ecf20Sopenharmony_cistatic __initdata unsigned long thash_entries;
43298c2ecf20Sopenharmony_cistatic int __init set_thash_entries(char *str)
43308c2ecf20Sopenharmony_ci{
43318c2ecf20Sopenharmony_ci	ssize_t ret;
43328c2ecf20Sopenharmony_ci
43338c2ecf20Sopenharmony_ci	if (!str)
43348c2ecf20Sopenharmony_ci		return 0;
43358c2ecf20Sopenharmony_ci
43368c2ecf20Sopenharmony_ci	ret = kstrtoul(str, 0, &thash_entries);
43378c2ecf20Sopenharmony_ci	if (ret)
43388c2ecf20Sopenharmony_ci		return 0;
43398c2ecf20Sopenharmony_ci
43408c2ecf20Sopenharmony_ci	return 1;
43418c2ecf20Sopenharmony_ci}
43428c2ecf20Sopenharmony_ci__setup("thash_entries=", set_thash_entries);
43438c2ecf20Sopenharmony_ci
43448c2ecf20Sopenharmony_cistatic void __init tcp_init_mem(void)
43458c2ecf20Sopenharmony_ci{
43468c2ecf20Sopenharmony_ci	unsigned long limit = nr_free_buffer_pages() / 16;
43478c2ecf20Sopenharmony_ci
43488c2ecf20Sopenharmony_ci	limit = max(limit, 128UL);
43498c2ecf20Sopenharmony_ci	sysctl_tcp_mem[0] = limit / 4 * 3;		/* 4.68 % */
43508c2ecf20Sopenharmony_ci	sysctl_tcp_mem[1] = limit;			/* 6.25 % */
43518c2ecf20Sopenharmony_ci	sysctl_tcp_mem[2] = sysctl_tcp_mem[0] * 2;	/* 9.37 % */
43528c2ecf20Sopenharmony_ci}
43538c2ecf20Sopenharmony_ci
43548c2ecf20Sopenharmony_civoid __init tcp_init(void)
43558c2ecf20Sopenharmony_ci{
43568c2ecf20Sopenharmony_ci	int max_rshare, max_wshare, cnt;
43578c2ecf20Sopenharmony_ci	unsigned long limit;
43588c2ecf20Sopenharmony_ci	unsigned int i;
43598c2ecf20Sopenharmony_ci
43608c2ecf20Sopenharmony_ci	BUILD_BUG_ON(TCP_MIN_SND_MSS <= MAX_TCP_OPTION_SPACE);
43618c2ecf20Sopenharmony_ci	BUILD_BUG_ON(sizeof(struct tcp_skb_cb) >
43628c2ecf20Sopenharmony_ci		     sizeof_field(struct sk_buff, cb));
43638c2ecf20Sopenharmony_ci
43648c2ecf20Sopenharmony_ci	percpu_counter_init(&tcp_sockets_allocated, 0, GFP_KERNEL);
43658c2ecf20Sopenharmony_ci
43668c2ecf20Sopenharmony_ci	timer_setup(&tcp_orphan_timer, tcp_orphan_update, TIMER_DEFERRABLE);
43678c2ecf20Sopenharmony_ci	mod_timer(&tcp_orphan_timer, jiffies + TCP_ORPHAN_TIMER_PERIOD);
43688c2ecf20Sopenharmony_ci
43698c2ecf20Sopenharmony_ci	inet_hashinfo_init(&tcp_hashinfo);
43708c2ecf20Sopenharmony_ci	inet_hashinfo2_init(&tcp_hashinfo, "tcp_listen_portaddr_hash",
43718c2ecf20Sopenharmony_ci			    thash_entries, 21,  /* one slot per 2 MB*/
43728c2ecf20Sopenharmony_ci			    0, 64 * 1024);
43738c2ecf20Sopenharmony_ci	tcp_hashinfo.bind_bucket_cachep =
43748c2ecf20Sopenharmony_ci		kmem_cache_create("tcp_bind_bucket",
43758c2ecf20Sopenharmony_ci				  sizeof(struct inet_bind_bucket), 0,
43768c2ecf20Sopenharmony_ci				  SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
43778c2ecf20Sopenharmony_ci
43788c2ecf20Sopenharmony_ci	/* Size and allocate the main established and bind bucket
43798c2ecf20Sopenharmony_ci	 * hash tables.
43808c2ecf20Sopenharmony_ci	 *
43818c2ecf20Sopenharmony_ci	 * The methodology is similar to that of the buffer cache.
43828c2ecf20Sopenharmony_ci	 */
43838c2ecf20Sopenharmony_ci	tcp_hashinfo.ehash =
43848c2ecf20Sopenharmony_ci		alloc_large_system_hash("TCP established",
43858c2ecf20Sopenharmony_ci					sizeof(struct inet_ehash_bucket),
43868c2ecf20Sopenharmony_ci					thash_entries,
43878c2ecf20Sopenharmony_ci					17, /* one slot per 128 KB of memory */
43888c2ecf20Sopenharmony_ci					0,
43898c2ecf20Sopenharmony_ci					NULL,
43908c2ecf20Sopenharmony_ci					&tcp_hashinfo.ehash_mask,
43918c2ecf20Sopenharmony_ci					0,
43928c2ecf20Sopenharmony_ci					thash_entries ? 0 : 512 * 1024);
43938c2ecf20Sopenharmony_ci	for (i = 0; i <= tcp_hashinfo.ehash_mask; i++)
43948c2ecf20Sopenharmony_ci		INIT_HLIST_NULLS_HEAD(&tcp_hashinfo.ehash[i].chain, i);
43958c2ecf20Sopenharmony_ci
43968c2ecf20Sopenharmony_ci	if (inet_ehash_locks_alloc(&tcp_hashinfo))
43978c2ecf20Sopenharmony_ci		panic("TCP: failed to alloc ehash_locks");
43988c2ecf20Sopenharmony_ci	tcp_hashinfo.bhash =
43998c2ecf20Sopenharmony_ci		alloc_large_system_hash("TCP bind",
44008c2ecf20Sopenharmony_ci					sizeof(struct inet_bind_hashbucket),
44018c2ecf20Sopenharmony_ci					tcp_hashinfo.ehash_mask + 1,
44028c2ecf20Sopenharmony_ci					17, /* one slot per 128 KB of memory */
44038c2ecf20Sopenharmony_ci					0,
44048c2ecf20Sopenharmony_ci					&tcp_hashinfo.bhash_size,
44058c2ecf20Sopenharmony_ci					NULL,
44068c2ecf20Sopenharmony_ci					0,
44078c2ecf20Sopenharmony_ci					64 * 1024);
44088c2ecf20Sopenharmony_ci	tcp_hashinfo.bhash_size = 1U << tcp_hashinfo.bhash_size;
44098c2ecf20Sopenharmony_ci	for (i = 0; i < tcp_hashinfo.bhash_size; i++) {
44108c2ecf20Sopenharmony_ci		spin_lock_init(&tcp_hashinfo.bhash[i].lock);
44118c2ecf20Sopenharmony_ci		INIT_HLIST_HEAD(&tcp_hashinfo.bhash[i].chain);
44128c2ecf20Sopenharmony_ci	}
44138c2ecf20Sopenharmony_ci
44148c2ecf20Sopenharmony_ci
44158c2ecf20Sopenharmony_ci	cnt = tcp_hashinfo.ehash_mask + 1;
44168c2ecf20Sopenharmony_ci	sysctl_tcp_max_orphans = cnt / 2;
44178c2ecf20Sopenharmony_ci
44188c2ecf20Sopenharmony_ci	tcp_init_mem();
44198c2ecf20Sopenharmony_ci	/* Set per-socket limits to no more than 1/128 the pressure threshold */
44208c2ecf20Sopenharmony_ci	limit = nr_free_buffer_pages() << (PAGE_SHIFT - 7);
44218c2ecf20Sopenharmony_ci	max_wshare = min(4UL*1024*1024, limit);
44228c2ecf20Sopenharmony_ci	max_rshare = min(6UL*1024*1024, limit);
44238c2ecf20Sopenharmony_ci
44248c2ecf20Sopenharmony_ci	init_net.ipv4.sysctl_tcp_wmem[0] = SK_MEM_QUANTUM;
44258c2ecf20Sopenharmony_ci	init_net.ipv4.sysctl_tcp_wmem[1] = 16*1024;
44268c2ecf20Sopenharmony_ci	init_net.ipv4.sysctl_tcp_wmem[2] = max(64*1024, max_wshare);
44278c2ecf20Sopenharmony_ci
44288c2ecf20Sopenharmony_ci	init_net.ipv4.sysctl_tcp_rmem[0] = SK_MEM_QUANTUM;
44298c2ecf20Sopenharmony_ci	init_net.ipv4.sysctl_tcp_rmem[1] = 131072;
44308c2ecf20Sopenharmony_ci	init_net.ipv4.sysctl_tcp_rmem[2] = max(131072, max_rshare);
44318c2ecf20Sopenharmony_ci
44328c2ecf20Sopenharmony_ci	pr_info("Hash tables configured (established %u bind %u)\n",
44338c2ecf20Sopenharmony_ci		tcp_hashinfo.ehash_mask + 1, tcp_hashinfo.bhash_size);
44348c2ecf20Sopenharmony_ci
44358c2ecf20Sopenharmony_ci	tcp_v4_init();
44368c2ecf20Sopenharmony_ci	tcp_metrics_init();
44378c2ecf20Sopenharmony_ci	BUG_ON(tcp_register_congestion_control(&tcp_reno) != 0);
44388c2ecf20Sopenharmony_ci	tcp_tasklet_init();
44398c2ecf20Sopenharmony_ci	mptcp_init();
44408c2ecf20Sopenharmony_ci}
4441