162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
262306a36Sopenharmony_ci/* Copyright (c) 2017 - 2018 Covalent IO, Inc. http://covalent.io */
362306a36Sopenharmony_ci
462306a36Sopenharmony_ci#include <linux/skmsg.h>
562306a36Sopenharmony_ci#include <linux/skbuff.h>
662306a36Sopenharmony_ci#include <linux/scatterlist.h>
762306a36Sopenharmony_ci
862306a36Sopenharmony_ci#include <net/sock.h>
962306a36Sopenharmony_ci#include <net/tcp.h>
1062306a36Sopenharmony_ci#include <net/tls.h>
1162306a36Sopenharmony_ci#include <trace/events/sock.h>
1262306a36Sopenharmony_ci
1362306a36Sopenharmony_cistatic bool sk_msg_try_coalesce_ok(struct sk_msg *msg, int elem_first_coalesce)
1462306a36Sopenharmony_ci{
1562306a36Sopenharmony_ci	if (msg->sg.end > msg->sg.start &&
1662306a36Sopenharmony_ci	    elem_first_coalesce < msg->sg.end)
1762306a36Sopenharmony_ci		return true;
1862306a36Sopenharmony_ci
1962306a36Sopenharmony_ci	if (msg->sg.end < msg->sg.start &&
2062306a36Sopenharmony_ci	    (elem_first_coalesce > msg->sg.start ||
2162306a36Sopenharmony_ci	     elem_first_coalesce < msg->sg.end))
2262306a36Sopenharmony_ci		return true;
2362306a36Sopenharmony_ci
2462306a36Sopenharmony_ci	return false;
2562306a36Sopenharmony_ci}
2662306a36Sopenharmony_ci
2762306a36Sopenharmony_ciint sk_msg_alloc(struct sock *sk, struct sk_msg *msg, int len,
2862306a36Sopenharmony_ci		 int elem_first_coalesce)
2962306a36Sopenharmony_ci{
3062306a36Sopenharmony_ci	struct page_frag *pfrag = sk_page_frag(sk);
3162306a36Sopenharmony_ci	u32 osize = msg->sg.size;
3262306a36Sopenharmony_ci	int ret = 0;
3362306a36Sopenharmony_ci
3462306a36Sopenharmony_ci	len -= msg->sg.size;
3562306a36Sopenharmony_ci	while (len > 0) {
3662306a36Sopenharmony_ci		struct scatterlist *sge;
3762306a36Sopenharmony_ci		u32 orig_offset;
3862306a36Sopenharmony_ci		int use, i;
3962306a36Sopenharmony_ci
4062306a36Sopenharmony_ci		if (!sk_page_frag_refill(sk, pfrag)) {
4162306a36Sopenharmony_ci			ret = -ENOMEM;
4262306a36Sopenharmony_ci			goto msg_trim;
4362306a36Sopenharmony_ci		}
4462306a36Sopenharmony_ci
4562306a36Sopenharmony_ci		orig_offset = pfrag->offset;
4662306a36Sopenharmony_ci		use = min_t(int, len, pfrag->size - orig_offset);
4762306a36Sopenharmony_ci		if (!sk_wmem_schedule(sk, use)) {
4862306a36Sopenharmony_ci			ret = -ENOMEM;
4962306a36Sopenharmony_ci			goto msg_trim;
5062306a36Sopenharmony_ci		}
5162306a36Sopenharmony_ci
5262306a36Sopenharmony_ci		i = msg->sg.end;
5362306a36Sopenharmony_ci		sk_msg_iter_var_prev(i);
5462306a36Sopenharmony_ci		sge = &msg->sg.data[i];
5562306a36Sopenharmony_ci
5662306a36Sopenharmony_ci		if (sk_msg_try_coalesce_ok(msg, elem_first_coalesce) &&
5762306a36Sopenharmony_ci		    sg_page(sge) == pfrag->page &&
5862306a36Sopenharmony_ci		    sge->offset + sge->length == orig_offset) {
5962306a36Sopenharmony_ci			sge->length += use;
6062306a36Sopenharmony_ci		} else {
6162306a36Sopenharmony_ci			if (sk_msg_full(msg)) {
6262306a36Sopenharmony_ci				ret = -ENOSPC;
6362306a36Sopenharmony_ci				break;
6462306a36Sopenharmony_ci			}
6562306a36Sopenharmony_ci
6662306a36Sopenharmony_ci			sge = &msg->sg.data[msg->sg.end];
6762306a36Sopenharmony_ci			sg_unmark_end(sge);
6862306a36Sopenharmony_ci			sg_set_page(sge, pfrag->page, use, orig_offset);
6962306a36Sopenharmony_ci			get_page(pfrag->page);
7062306a36Sopenharmony_ci			sk_msg_iter_next(msg, end);
7162306a36Sopenharmony_ci		}
7262306a36Sopenharmony_ci
7362306a36Sopenharmony_ci		sk_mem_charge(sk, use);
7462306a36Sopenharmony_ci		msg->sg.size += use;
7562306a36Sopenharmony_ci		pfrag->offset += use;
7662306a36Sopenharmony_ci		len -= use;
7762306a36Sopenharmony_ci	}
7862306a36Sopenharmony_ci
7962306a36Sopenharmony_ci	return ret;
8062306a36Sopenharmony_ci
8162306a36Sopenharmony_cimsg_trim:
8262306a36Sopenharmony_ci	sk_msg_trim(sk, msg, osize);
8362306a36Sopenharmony_ci	return ret;
8462306a36Sopenharmony_ci}
8562306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(sk_msg_alloc);
8662306a36Sopenharmony_ci
8762306a36Sopenharmony_ciint sk_msg_clone(struct sock *sk, struct sk_msg *dst, struct sk_msg *src,
8862306a36Sopenharmony_ci		 u32 off, u32 len)
8962306a36Sopenharmony_ci{
9062306a36Sopenharmony_ci	int i = src->sg.start;
9162306a36Sopenharmony_ci	struct scatterlist *sge = sk_msg_elem(src, i);
9262306a36Sopenharmony_ci	struct scatterlist *sgd = NULL;
9362306a36Sopenharmony_ci	u32 sge_len, sge_off;
9462306a36Sopenharmony_ci
9562306a36Sopenharmony_ci	while (off) {
9662306a36Sopenharmony_ci		if (sge->length > off)
9762306a36Sopenharmony_ci			break;
9862306a36Sopenharmony_ci		off -= sge->length;
9962306a36Sopenharmony_ci		sk_msg_iter_var_next(i);
10062306a36Sopenharmony_ci		if (i == src->sg.end && off)
10162306a36Sopenharmony_ci			return -ENOSPC;
10262306a36Sopenharmony_ci		sge = sk_msg_elem(src, i);
10362306a36Sopenharmony_ci	}
10462306a36Sopenharmony_ci
10562306a36Sopenharmony_ci	while (len) {
10662306a36Sopenharmony_ci		sge_len = sge->length - off;
10762306a36Sopenharmony_ci		if (sge_len > len)
10862306a36Sopenharmony_ci			sge_len = len;
10962306a36Sopenharmony_ci
11062306a36Sopenharmony_ci		if (dst->sg.end)
11162306a36Sopenharmony_ci			sgd = sk_msg_elem(dst, dst->sg.end - 1);
11262306a36Sopenharmony_ci
11362306a36Sopenharmony_ci		if (sgd &&
11462306a36Sopenharmony_ci		    (sg_page(sge) == sg_page(sgd)) &&
11562306a36Sopenharmony_ci		    (sg_virt(sge) + off == sg_virt(sgd) + sgd->length)) {
11662306a36Sopenharmony_ci			sgd->length += sge_len;
11762306a36Sopenharmony_ci			dst->sg.size += sge_len;
11862306a36Sopenharmony_ci		} else if (!sk_msg_full(dst)) {
11962306a36Sopenharmony_ci			sge_off = sge->offset + off;
12062306a36Sopenharmony_ci			sk_msg_page_add(dst, sg_page(sge), sge_len, sge_off);
12162306a36Sopenharmony_ci		} else {
12262306a36Sopenharmony_ci			return -ENOSPC;
12362306a36Sopenharmony_ci		}
12462306a36Sopenharmony_ci
12562306a36Sopenharmony_ci		off = 0;
12662306a36Sopenharmony_ci		len -= sge_len;
12762306a36Sopenharmony_ci		sk_mem_charge(sk, sge_len);
12862306a36Sopenharmony_ci		sk_msg_iter_var_next(i);
12962306a36Sopenharmony_ci		if (i == src->sg.end && len)
13062306a36Sopenharmony_ci			return -ENOSPC;
13162306a36Sopenharmony_ci		sge = sk_msg_elem(src, i);
13262306a36Sopenharmony_ci	}
13362306a36Sopenharmony_ci
13462306a36Sopenharmony_ci	return 0;
13562306a36Sopenharmony_ci}
13662306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(sk_msg_clone);
13762306a36Sopenharmony_ci
13862306a36Sopenharmony_civoid sk_msg_return_zero(struct sock *sk, struct sk_msg *msg, int bytes)
13962306a36Sopenharmony_ci{
14062306a36Sopenharmony_ci	int i = msg->sg.start;
14162306a36Sopenharmony_ci
14262306a36Sopenharmony_ci	do {
14362306a36Sopenharmony_ci		struct scatterlist *sge = sk_msg_elem(msg, i);
14462306a36Sopenharmony_ci
14562306a36Sopenharmony_ci		if (bytes < sge->length) {
14662306a36Sopenharmony_ci			sge->length -= bytes;
14762306a36Sopenharmony_ci			sge->offset += bytes;
14862306a36Sopenharmony_ci			sk_mem_uncharge(sk, bytes);
14962306a36Sopenharmony_ci			break;
15062306a36Sopenharmony_ci		}
15162306a36Sopenharmony_ci
15262306a36Sopenharmony_ci		sk_mem_uncharge(sk, sge->length);
15362306a36Sopenharmony_ci		bytes -= sge->length;
15462306a36Sopenharmony_ci		sge->length = 0;
15562306a36Sopenharmony_ci		sge->offset = 0;
15662306a36Sopenharmony_ci		sk_msg_iter_var_next(i);
15762306a36Sopenharmony_ci	} while (bytes && i != msg->sg.end);
15862306a36Sopenharmony_ci	msg->sg.start = i;
15962306a36Sopenharmony_ci}
16062306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(sk_msg_return_zero);
16162306a36Sopenharmony_ci
16262306a36Sopenharmony_civoid sk_msg_return(struct sock *sk, struct sk_msg *msg, int bytes)
16362306a36Sopenharmony_ci{
16462306a36Sopenharmony_ci	int i = msg->sg.start;
16562306a36Sopenharmony_ci
16662306a36Sopenharmony_ci	do {
16762306a36Sopenharmony_ci		struct scatterlist *sge = &msg->sg.data[i];
16862306a36Sopenharmony_ci		int uncharge = (bytes < sge->length) ? bytes : sge->length;
16962306a36Sopenharmony_ci
17062306a36Sopenharmony_ci		sk_mem_uncharge(sk, uncharge);
17162306a36Sopenharmony_ci		bytes -= uncharge;
17262306a36Sopenharmony_ci		sk_msg_iter_var_next(i);
17362306a36Sopenharmony_ci	} while (i != msg->sg.end);
17462306a36Sopenharmony_ci}
17562306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(sk_msg_return);
17662306a36Sopenharmony_ci
17762306a36Sopenharmony_cistatic int sk_msg_free_elem(struct sock *sk, struct sk_msg *msg, u32 i,
17862306a36Sopenharmony_ci			    bool charge)
17962306a36Sopenharmony_ci{
18062306a36Sopenharmony_ci	struct scatterlist *sge = sk_msg_elem(msg, i);
18162306a36Sopenharmony_ci	u32 len = sge->length;
18262306a36Sopenharmony_ci
18362306a36Sopenharmony_ci	/* When the skb owns the memory we free it from consume_skb path. */
18462306a36Sopenharmony_ci	if (!msg->skb) {
18562306a36Sopenharmony_ci		if (charge)
18662306a36Sopenharmony_ci			sk_mem_uncharge(sk, len);
18762306a36Sopenharmony_ci		put_page(sg_page(sge));
18862306a36Sopenharmony_ci	}
18962306a36Sopenharmony_ci	memset(sge, 0, sizeof(*sge));
19062306a36Sopenharmony_ci	return len;
19162306a36Sopenharmony_ci}
19262306a36Sopenharmony_ci
19362306a36Sopenharmony_cistatic int __sk_msg_free(struct sock *sk, struct sk_msg *msg, u32 i,
19462306a36Sopenharmony_ci			 bool charge)
19562306a36Sopenharmony_ci{
19662306a36Sopenharmony_ci	struct scatterlist *sge = sk_msg_elem(msg, i);
19762306a36Sopenharmony_ci	int freed = 0;
19862306a36Sopenharmony_ci
19962306a36Sopenharmony_ci	while (msg->sg.size) {
20062306a36Sopenharmony_ci		msg->sg.size -= sge->length;
20162306a36Sopenharmony_ci		freed += sk_msg_free_elem(sk, msg, i, charge);
20262306a36Sopenharmony_ci		sk_msg_iter_var_next(i);
20362306a36Sopenharmony_ci		sk_msg_check_to_free(msg, i, msg->sg.size);
20462306a36Sopenharmony_ci		sge = sk_msg_elem(msg, i);
20562306a36Sopenharmony_ci	}
20662306a36Sopenharmony_ci	consume_skb(msg->skb);
20762306a36Sopenharmony_ci	sk_msg_init(msg);
20862306a36Sopenharmony_ci	return freed;
20962306a36Sopenharmony_ci}
21062306a36Sopenharmony_ci
21162306a36Sopenharmony_ciint sk_msg_free_nocharge(struct sock *sk, struct sk_msg *msg)
21262306a36Sopenharmony_ci{
21362306a36Sopenharmony_ci	return __sk_msg_free(sk, msg, msg->sg.start, false);
21462306a36Sopenharmony_ci}
21562306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(sk_msg_free_nocharge);
21662306a36Sopenharmony_ci
21762306a36Sopenharmony_ciint sk_msg_free(struct sock *sk, struct sk_msg *msg)
21862306a36Sopenharmony_ci{
21962306a36Sopenharmony_ci	return __sk_msg_free(sk, msg, msg->sg.start, true);
22062306a36Sopenharmony_ci}
22162306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(sk_msg_free);
22262306a36Sopenharmony_ci
22362306a36Sopenharmony_cistatic void __sk_msg_free_partial(struct sock *sk, struct sk_msg *msg,
22462306a36Sopenharmony_ci				  u32 bytes, bool charge)
22562306a36Sopenharmony_ci{
22662306a36Sopenharmony_ci	struct scatterlist *sge;
22762306a36Sopenharmony_ci	u32 i = msg->sg.start;
22862306a36Sopenharmony_ci
22962306a36Sopenharmony_ci	while (bytes) {
23062306a36Sopenharmony_ci		sge = sk_msg_elem(msg, i);
23162306a36Sopenharmony_ci		if (!sge->length)
23262306a36Sopenharmony_ci			break;
23362306a36Sopenharmony_ci		if (bytes < sge->length) {
23462306a36Sopenharmony_ci			if (charge)
23562306a36Sopenharmony_ci				sk_mem_uncharge(sk, bytes);
23662306a36Sopenharmony_ci			sge->length -= bytes;
23762306a36Sopenharmony_ci			sge->offset += bytes;
23862306a36Sopenharmony_ci			msg->sg.size -= bytes;
23962306a36Sopenharmony_ci			break;
24062306a36Sopenharmony_ci		}
24162306a36Sopenharmony_ci
24262306a36Sopenharmony_ci		msg->sg.size -= sge->length;
24362306a36Sopenharmony_ci		bytes -= sge->length;
24462306a36Sopenharmony_ci		sk_msg_free_elem(sk, msg, i, charge);
24562306a36Sopenharmony_ci		sk_msg_iter_var_next(i);
24662306a36Sopenharmony_ci		sk_msg_check_to_free(msg, i, bytes);
24762306a36Sopenharmony_ci	}
24862306a36Sopenharmony_ci	msg->sg.start = i;
24962306a36Sopenharmony_ci}
25062306a36Sopenharmony_ci
25162306a36Sopenharmony_civoid sk_msg_free_partial(struct sock *sk, struct sk_msg *msg, u32 bytes)
25262306a36Sopenharmony_ci{
25362306a36Sopenharmony_ci	__sk_msg_free_partial(sk, msg, bytes, true);
25462306a36Sopenharmony_ci}
25562306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(sk_msg_free_partial);
25662306a36Sopenharmony_ci
25762306a36Sopenharmony_civoid sk_msg_free_partial_nocharge(struct sock *sk, struct sk_msg *msg,
25862306a36Sopenharmony_ci				  u32 bytes)
25962306a36Sopenharmony_ci{
26062306a36Sopenharmony_ci	__sk_msg_free_partial(sk, msg, bytes, false);
26162306a36Sopenharmony_ci}
26262306a36Sopenharmony_ci
26362306a36Sopenharmony_civoid sk_msg_trim(struct sock *sk, struct sk_msg *msg, int len)
26462306a36Sopenharmony_ci{
26562306a36Sopenharmony_ci	int trim = msg->sg.size - len;
26662306a36Sopenharmony_ci	u32 i = msg->sg.end;
26762306a36Sopenharmony_ci
26862306a36Sopenharmony_ci	if (trim <= 0) {
26962306a36Sopenharmony_ci		WARN_ON(trim < 0);
27062306a36Sopenharmony_ci		return;
27162306a36Sopenharmony_ci	}
27262306a36Sopenharmony_ci
27362306a36Sopenharmony_ci	sk_msg_iter_var_prev(i);
27462306a36Sopenharmony_ci	msg->sg.size = len;
27562306a36Sopenharmony_ci	while (msg->sg.data[i].length &&
27662306a36Sopenharmony_ci	       trim >= msg->sg.data[i].length) {
27762306a36Sopenharmony_ci		trim -= msg->sg.data[i].length;
27862306a36Sopenharmony_ci		sk_msg_free_elem(sk, msg, i, true);
27962306a36Sopenharmony_ci		sk_msg_iter_var_prev(i);
28062306a36Sopenharmony_ci		if (!trim)
28162306a36Sopenharmony_ci			goto out;
28262306a36Sopenharmony_ci	}
28362306a36Sopenharmony_ci
28462306a36Sopenharmony_ci	msg->sg.data[i].length -= trim;
28562306a36Sopenharmony_ci	sk_mem_uncharge(sk, trim);
28662306a36Sopenharmony_ci	/* Adjust copybreak if it falls into the trimmed part of last buf */
28762306a36Sopenharmony_ci	if (msg->sg.curr == i && msg->sg.copybreak > msg->sg.data[i].length)
28862306a36Sopenharmony_ci		msg->sg.copybreak = msg->sg.data[i].length;
28962306a36Sopenharmony_ciout:
29062306a36Sopenharmony_ci	sk_msg_iter_var_next(i);
29162306a36Sopenharmony_ci	msg->sg.end = i;
29262306a36Sopenharmony_ci
29362306a36Sopenharmony_ci	/* If we trim data a full sg elem before curr pointer update
29462306a36Sopenharmony_ci	 * copybreak and current so that any future copy operations
29562306a36Sopenharmony_ci	 * start at new copy location.
29662306a36Sopenharmony_ci	 * However trimed data that has not yet been used in a copy op
29762306a36Sopenharmony_ci	 * does not require an update.
29862306a36Sopenharmony_ci	 */
29962306a36Sopenharmony_ci	if (!msg->sg.size) {
30062306a36Sopenharmony_ci		msg->sg.curr = msg->sg.start;
30162306a36Sopenharmony_ci		msg->sg.copybreak = 0;
30262306a36Sopenharmony_ci	} else if (sk_msg_iter_dist(msg->sg.start, msg->sg.curr) >=
30362306a36Sopenharmony_ci		   sk_msg_iter_dist(msg->sg.start, msg->sg.end)) {
30462306a36Sopenharmony_ci		sk_msg_iter_var_prev(i);
30562306a36Sopenharmony_ci		msg->sg.curr = i;
30662306a36Sopenharmony_ci		msg->sg.copybreak = msg->sg.data[i].length;
30762306a36Sopenharmony_ci	}
30862306a36Sopenharmony_ci}
30962306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(sk_msg_trim);
31062306a36Sopenharmony_ci
31162306a36Sopenharmony_ciint sk_msg_zerocopy_from_iter(struct sock *sk, struct iov_iter *from,
31262306a36Sopenharmony_ci			      struct sk_msg *msg, u32 bytes)
31362306a36Sopenharmony_ci{
31462306a36Sopenharmony_ci	int i, maxpages, ret = 0, num_elems = sk_msg_elem_used(msg);
31562306a36Sopenharmony_ci	const int to_max_pages = MAX_MSG_FRAGS;
31662306a36Sopenharmony_ci	struct page *pages[MAX_MSG_FRAGS];
31762306a36Sopenharmony_ci	ssize_t orig, copied, use, offset;
31862306a36Sopenharmony_ci
31962306a36Sopenharmony_ci	orig = msg->sg.size;
32062306a36Sopenharmony_ci	while (bytes > 0) {
32162306a36Sopenharmony_ci		i = 0;
32262306a36Sopenharmony_ci		maxpages = to_max_pages - num_elems;
32362306a36Sopenharmony_ci		if (maxpages == 0) {
32462306a36Sopenharmony_ci			ret = -EFAULT;
32562306a36Sopenharmony_ci			goto out;
32662306a36Sopenharmony_ci		}
32762306a36Sopenharmony_ci
32862306a36Sopenharmony_ci		copied = iov_iter_get_pages2(from, pages, bytes, maxpages,
32962306a36Sopenharmony_ci					    &offset);
33062306a36Sopenharmony_ci		if (copied <= 0) {
33162306a36Sopenharmony_ci			ret = -EFAULT;
33262306a36Sopenharmony_ci			goto out;
33362306a36Sopenharmony_ci		}
33462306a36Sopenharmony_ci
33562306a36Sopenharmony_ci		bytes -= copied;
33662306a36Sopenharmony_ci		msg->sg.size += copied;
33762306a36Sopenharmony_ci
33862306a36Sopenharmony_ci		while (copied) {
33962306a36Sopenharmony_ci			use = min_t(int, copied, PAGE_SIZE - offset);
34062306a36Sopenharmony_ci			sg_set_page(&msg->sg.data[msg->sg.end],
34162306a36Sopenharmony_ci				    pages[i], use, offset);
34262306a36Sopenharmony_ci			sg_unmark_end(&msg->sg.data[msg->sg.end]);
34362306a36Sopenharmony_ci			sk_mem_charge(sk, use);
34462306a36Sopenharmony_ci
34562306a36Sopenharmony_ci			offset = 0;
34662306a36Sopenharmony_ci			copied -= use;
34762306a36Sopenharmony_ci			sk_msg_iter_next(msg, end);
34862306a36Sopenharmony_ci			num_elems++;
34962306a36Sopenharmony_ci			i++;
35062306a36Sopenharmony_ci		}
35162306a36Sopenharmony_ci		/* When zerocopy is mixed with sk_msg_*copy* operations we
35262306a36Sopenharmony_ci		 * may have a copybreak set in this case clear and prefer
35362306a36Sopenharmony_ci		 * zerocopy remainder when possible.
35462306a36Sopenharmony_ci		 */
35562306a36Sopenharmony_ci		msg->sg.copybreak = 0;
35662306a36Sopenharmony_ci		msg->sg.curr = msg->sg.end;
35762306a36Sopenharmony_ci	}
35862306a36Sopenharmony_ciout:
35962306a36Sopenharmony_ci	/* Revert iov_iter updates, msg will need to use 'trim' later if it
36062306a36Sopenharmony_ci	 * also needs to be cleared.
36162306a36Sopenharmony_ci	 */
36262306a36Sopenharmony_ci	if (ret)
36362306a36Sopenharmony_ci		iov_iter_revert(from, msg->sg.size - orig);
36462306a36Sopenharmony_ci	return ret;
36562306a36Sopenharmony_ci}
36662306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(sk_msg_zerocopy_from_iter);
36762306a36Sopenharmony_ci
36862306a36Sopenharmony_ciint sk_msg_memcopy_from_iter(struct sock *sk, struct iov_iter *from,
36962306a36Sopenharmony_ci			     struct sk_msg *msg, u32 bytes)
37062306a36Sopenharmony_ci{
37162306a36Sopenharmony_ci	int ret = -ENOSPC, i = msg->sg.curr;
37262306a36Sopenharmony_ci	struct scatterlist *sge;
37362306a36Sopenharmony_ci	u32 copy, buf_size;
37462306a36Sopenharmony_ci	void *to;
37562306a36Sopenharmony_ci
37662306a36Sopenharmony_ci	do {
37762306a36Sopenharmony_ci		sge = sk_msg_elem(msg, i);
37862306a36Sopenharmony_ci		/* This is possible if a trim operation shrunk the buffer */
37962306a36Sopenharmony_ci		if (msg->sg.copybreak >= sge->length) {
38062306a36Sopenharmony_ci			msg->sg.copybreak = 0;
38162306a36Sopenharmony_ci			sk_msg_iter_var_next(i);
38262306a36Sopenharmony_ci			if (i == msg->sg.end)
38362306a36Sopenharmony_ci				break;
38462306a36Sopenharmony_ci			sge = sk_msg_elem(msg, i);
38562306a36Sopenharmony_ci		}
38662306a36Sopenharmony_ci
38762306a36Sopenharmony_ci		buf_size = sge->length - msg->sg.copybreak;
38862306a36Sopenharmony_ci		copy = (buf_size > bytes) ? bytes : buf_size;
38962306a36Sopenharmony_ci		to = sg_virt(sge) + msg->sg.copybreak;
39062306a36Sopenharmony_ci		msg->sg.copybreak += copy;
39162306a36Sopenharmony_ci		if (sk->sk_route_caps & NETIF_F_NOCACHE_COPY)
39262306a36Sopenharmony_ci			ret = copy_from_iter_nocache(to, copy, from);
39362306a36Sopenharmony_ci		else
39462306a36Sopenharmony_ci			ret = copy_from_iter(to, copy, from);
39562306a36Sopenharmony_ci		if (ret != copy) {
39662306a36Sopenharmony_ci			ret = -EFAULT;
39762306a36Sopenharmony_ci			goto out;
39862306a36Sopenharmony_ci		}
39962306a36Sopenharmony_ci		bytes -= copy;
40062306a36Sopenharmony_ci		if (!bytes)
40162306a36Sopenharmony_ci			break;
40262306a36Sopenharmony_ci		msg->sg.copybreak = 0;
40362306a36Sopenharmony_ci		sk_msg_iter_var_next(i);
40462306a36Sopenharmony_ci	} while (i != msg->sg.end);
40562306a36Sopenharmony_ciout:
40662306a36Sopenharmony_ci	msg->sg.curr = i;
40762306a36Sopenharmony_ci	return ret;
40862306a36Sopenharmony_ci}
40962306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(sk_msg_memcopy_from_iter);
41062306a36Sopenharmony_ci
41162306a36Sopenharmony_ci/* Receive sk_msg from psock->ingress_msg to @msg. */
41262306a36Sopenharmony_ciint sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
41362306a36Sopenharmony_ci		   int len, int flags)
41462306a36Sopenharmony_ci{
41562306a36Sopenharmony_ci	struct iov_iter *iter = &msg->msg_iter;
41662306a36Sopenharmony_ci	int peek = flags & MSG_PEEK;
41762306a36Sopenharmony_ci	struct sk_msg *msg_rx;
41862306a36Sopenharmony_ci	int i, copied = 0;
41962306a36Sopenharmony_ci
42062306a36Sopenharmony_ci	msg_rx = sk_psock_peek_msg(psock);
42162306a36Sopenharmony_ci	while (copied != len) {
42262306a36Sopenharmony_ci		struct scatterlist *sge;
42362306a36Sopenharmony_ci
42462306a36Sopenharmony_ci		if (unlikely(!msg_rx))
42562306a36Sopenharmony_ci			break;
42662306a36Sopenharmony_ci
42762306a36Sopenharmony_ci		i = msg_rx->sg.start;
42862306a36Sopenharmony_ci		do {
42962306a36Sopenharmony_ci			struct page *page;
43062306a36Sopenharmony_ci			int copy;
43162306a36Sopenharmony_ci
43262306a36Sopenharmony_ci			sge = sk_msg_elem(msg_rx, i);
43362306a36Sopenharmony_ci			copy = sge->length;
43462306a36Sopenharmony_ci			page = sg_page(sge);
43562306a36Sopenharmony_ci			if (copied + copy > len)
43662306a36Sopenharmony_ci				copy = len - copied;
43762306a36Sopenharmony_ci			copy = copy_page_to_iter(page, sge->offset, copy, iter);
43862306a36Sopenharmony_ci			if (!copy) {
43962306a36Sopenharmony_ci				copied = copied ? copied : -EFAULT;
44062306a36Sopenharmony_ci				goto out;
44162306a36Sopenharmony_ci			}
44262306a36Sopenharmony_ci
44362306a36Sopenharmony_ci			copied += copy;
44462306a36Sopenharmony_ci			if (likely(!peek)) {
44562306a36Sopenharmony_ci				sge->offset += copy;
44662306a36Sopenharmony_ci				sge->length -= copy;
44762306a36Sopenharmony_ci				if (!msg_rx->skb)
44862306a36Sopenharmony_ci					sk_mem_uncharge(sk, copy);
44962306a36Sopenharmony_ci				msg_rx->sg.size -= copy;
45062306a36Sopenharmony_ci
45162306a36Sopenharmony_ci				if (!sge->length) {
45262306a36Sopenharmony_ci					sk_msg_iter_var_next(i);
45362306a36Sopenharmony_ci					if (!msg_rx->skb)
45462306a36Sopenharmony_ci						put_page(page);
45562306a36Sopenharmony_ci				}
45662306a36Sopenharmony_ci			} else {
45762306a36Sopenharmony_ci				/* Lets not optimize peek case if copy_page_to_iter
45862306a36Sopenharmony_ci				 * didn't copy the entire length lets just break.
45962306a36Sopenharmony_ci				 */
46062306a36Sopenharmony_ci				if (copy != sge->length)
46162306a36Sopenharmony_ci					goto out;
46262306a36Sopenharmony_ci				sk_msg_iter_var_next(i);
46362306a36Sopenharmony_ci			}
46462306a36Sopenharmony_ci
46562306a36Sopenharmony_ci			if (copied == len)
46662306a36Sopenharmony_ci				break;
46762306a36Sopenharmony_ci		} while ((i != msg_rx->sg.end) && !sg_is_last(sge));
46862306a36Sopenharmony_ci
46962306a36Sopenharmony_ci		if (unlikely(peek)) {
47062306a36Sopenharmony_ci			msg_rx = sk_psock_next_msg(psock, msg_rx);
47162306a36Sopenharmony_ci			if (!msg_rx)
47262306a36Sopenharmony_ci				break;
47362306a36Sopenharmony_ci			continue;
47462306a36Sopenharmony_ci		}
47562306a36Sopenharmony_ci
47662306a36Sopenharmony_ci		msg_rx->sg.start = i;
47762306a36Sopenharmony_ci		if (!sge->length && (i == msg_rx->sg.end || sg_is_last(sge))) {
47862306a36Sopenharmony_ci			msg_rx = sk_psock_dequeue_msg(psock);
47962306a36Sopenharmony_ci			kfree_sk_msg(msg_rx);
48062306a36Sopenharmony_ci		}
48162306a36Sopenharmony_ci		msg_rx = sk_psock_peek_msg(psock);
48262306a36Sopenharmony_ci	}
48362306a36Sopenharmony_ciout:
48462306a36Sopenharmony_ci	return copied;
48562306a36Sopenharmony_ci}
48662306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(sk_msg_recvmsg);
48762306a36Sopenharmony_ci
48862306a36Sopenharmony_cibool sk_msg_is_readable(struct sock *sk)
48962306a36Sopenharmony_ci{
49062306a36Sopenharmony_ci	struct sk_psock *psock;
49162306a36Sopenharmony_ci	bool empty = true;
49262306a36Sopenharmony_ci
49362306a36Sopenharmony_ci	rcu_read_lock();
49462306a36Sopenharmony_ci	psock = sk_psock(sk);
49562306a36Sopenharmony_ci	if (likely(psock))
49662306a36Sopenharmony_ci		empty = list_empty(&psock->ingress_msg);
49762306a36Sopenharmony_ci	rcu_read_unlock();
49862306a36Sopenharmony_ci	return !empty;
49962306a36Sopenharmony_ci}
50062306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(sk_msg_is_readable);
50162306a36Sopenharmony_ci
50262306a36Sopenharmony_cistatic struct sk_msg *alloc_sk_msg(gfp_t gfp)
50362306a36Sopenharmony_ci{
50462306a36Sopenharmony_ci	struct sk_msg *msg;
50562306a36Sopenharmony_ci
50662306a36Sopenharmony_ci	msg = kzalloc(sizeof(*msg), gfp | __GFP_NOWARN);
50762306a36Sopenharmony_ci	if (unlikely(!msg))
50862306a36Sopenharmony_ci		return NULL;
50962306a36Sopenharmony_ci	sg_init_marker(msg->sg.data, NR_MSG_FRAG_IDS);
51062306a36Sopenharmony_ci	return msg;
51162306a36Sopenharmony_ci}
51262306a36Sopenharmony_ci
51362306a36Sopenharmony_cistatic struct sk_msg *sk_psock_create_ingress_msg(struct sock *sk,
51462306a36Sopenharmony_ci						  struct sk_buff *skb)
51562306a36Sopenharmony_ci{
51662306a36Sopenharmony_ci	if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
51762306a36Sopenharmony_ci		return NULL;
51862306a36Sopenharmony_ci
51962306a36Sopenharmony_ci	if (!sk_rmem_schedule(sk, skb, skb->truesize))
52062306a36Sopenharmony_ci		return NULL;
52162306a36Sopenharmony_ci
52262306a36Sopenharmony_ci	return alloc_sk_msg(GFP_KERNEL);
52362306a36Sopenharmony_ci}
52462306a36Sopenharmony_ci
52562306a36Sopenharmony_cistatic int sk_psock_skb_ingress_enqueue(struct sk_buff *skb,
52662306a36Sopenharmony_ci					u32 off, u32 len,
52762306a36Sopenharmony_ci					struct sk_psock *psock,
52862306a36Sopenharmony_ci					struct sock *sk,
52962306a36Sopenharmony_ci					struct sk_msg *msg)
53062306a36Sopenharmony_ci{
53162306a36Sopenharmony_ci	int num_sge, copied;
53262306a36Sopenharmony_ci
53362306a36Sopenharmony_ci	num_sge = skb_to_sgvec(skb, msg->sg.data, off, len);
53462306a36Sopenharmony_ci	if (num_sge < 0) {
53562306a36Sopenharmony_ci		/* skb linearize may fail with ENOMEM, but lets simply try again
53662306a36Sopenharmony_ci		 * later if this happens. Under memory pressure we don't want to
53762306a36Sopenharmony_ci		 * drop the skb. We need to linearize the skb so that the mapping
53862306a36Sopenharmony_ci		 * in skb_to_sgvec can not error.
53962306a36Sopenharmony_ci		 */
54062306a36Sopenharmony_ci		if (skb_linearize(skb))
54162306a36Sopenharmony_ci			return -EAGAIN;
54262306a36Sopenharmony_ci
54362306a36Sopenharmony_ci		num_sge = skb_to_sgvec(skb, msg->sg.data, off, len);
54462306a36Sopenharmony_ci		if (unlikely(num_sge < 0))
54562306a36Sopenharmony_ci			return num_sge;
54662306a36Sopenharmony_ci	}
54762306a36Sopenharmony_ci
54862306a36Sopenharmony_ci	copied = len;
54962306a36Sopenharmony_ci	msg->sg.start = 0;
55062306a36Sopenharmony_ci	msg->sg.size = copied;
55162306a36Sopenharmony_ci	msg->sg.end = num_sge;
55262306a36Sopenharmony_ci	msg->skb = skb;
55362306a36Sopenharmony_ci
55462306a36Sopenharmony_ci	sk_psock_queue_msg(psock, msg);
55562306a36Sopenharmony_ci	sk_psock_data_ready(sk, psock);
55662306a36Sopenharmony_ci	return copied;
55762306a36Sopenharmony_ci}
55862306a36Sopenharmony_ci
55962306a36Sopenharmony_cistatic int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb,
56062306a36Sopenharmony_ci				     u32 off, u32 len);
56162306a36Sopenharmony_ci
56262306a36Sopenharmony_cistatic int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb,
56362306a36Sopenharmony_ci				u32 off, u32 len)
56462306a36Sopenharmony_ci{
56562306a36Sopenharmony_ci	struct sock *sk = psock->sk;
56662306a36Sopenharmony_ci	struct sk_msg *msg;
56762306a36Sopenharmony_ci	int err;
56862306a36Sopenharmony_ci
56962306a36Sopenharmony_ci	/* If we are receiving on the same sock skb->sk is already assigned,
57062306a36Sopenharmony_ci	 * skip memory accounting and owner transition seeing it already set
57162306a36Sopenharmony_ci	 * correctly.
57262306a36Sopenharmony_ci	 */
57362306a36Sopenharmony_ci	if (unlikely(skb->sk == sk))
57462306a36Sopenharmony_ci		return sk_psock_skb_ingress_self(psock, skb, off, len);
57562306a36Sopenharmony_ci	msg = sk_psock_create_ingress_msg(sk, skb);
57662306a36Sopenharmony_ci	if (!msg)
57762306a36Sopenharmony_ci		return -EAGAIN;
57862306a36Sopenharmony_ci
57962306a36Sopenharmony_ci	/* This will transition ownership of the data from the socket where
58062306a36Sopenharmony_ci	 * the BPF program was run initiating the redirect to the socket
58162306a36Sopenharmony_ci	 * we will eventually receive this data on. The data will be released
58262306a36Sopenharmony_ci	 * from skb_consume found in __tcp_bpf_recvmsg() after its been copied
58362306a36Sopenharmony_ci	 * into user buffers.
58462306a36Sopenharmony_ci	 */
58562306a36Sopenharmony_ci	skb_set_owner_r(skb, sk);
58662306a36Sopenharmony_ci	err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg);
58762306a36Sopenharmony_ci	if (err < 0)
58862306a36Sopenharmony_ci		kfree(msg);
58962306a36Sopenharmony_ci	return err;
59062306a36Sopenharmony_ci}
59162306a36Sopenharmony_ci
59262306a36Sopenharmony_ci/* Puts an skb on the ingress queue of the socket already assigned to the
59362306a36Sopenharmony_ci * skb. In this case we do not need to check memory limits or skb_set_owner_r
59462306a36Sopenharmony_ci * because the skb is already accounted for here.
59562306a36Sopenharmony_ci */
59662306a36Sopenharmony_cistatic int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb,
59762306a36Sopenharmony_ci				     u32 off, u32 len)
59862306a36Sopenharmony_ci{
59962306a36Sopenharmony_ci	struct sk_msg *msg = alloc_sk_msg(GFP_ATOMIC);
60062306a36Sopenharmony_ci	struct sock *sk = psock->sk;
60162306a36Sopenharmony_ci	int err;
60262306a36Sopenharmony_ci
60362306a36Sopenharmony_ci	if (unlikely(!msg))
60462306a36Sopenharmony_ci		return -EAGAIN;
60562306a36Sopenharmony_ci	skb_set_owner_r(skb, sk);
60662306a36Sopenharmony_ci	err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg);
60762306a36Sopenharmony_ci	if (err < 0)
60862306a36Sopenharmony_ci		kfree(msg);
60962306a36Sopenharmony_ci	return err;
61062306a36Sopenharmony_ci}
61162306a36Sopenharmony_ci
61262306a36Sopenharmony_cistatic int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb,
61362306a36Sopenharmony_ci			       u32 off, u32 len, bool ingress)
61462306a36Sopenharmony_ci{
61562306a36Sopenharmony_ci	int err = 0;
61662306a36Sopenharmony_ci
61762306a36Sopenharmony_ci	if (!ingress) {
61862306a36Sopenharmony_ci		if (!sock_writeable(psock->sk))
61962306a36Sopenharmony_ci			return -EAGAIN;
62062306a36Sopenharmony_ci		return skb_send_sock(psock->sk, skb, off, len);
62162306a36Sopenharmony_ci	}
62262306a36Sopenharmony_ci	skb_get(skb);
62362306a36Sopenharmony_ci	err = sk_psock_skb_ingress(psock, skb, off, len);
62462306a36Sopenharmony_ci	if (err < 0)
62562306a36Sopenharmony_ci		kfree_skb(skb);
62662306a36Sopenharmony_ci	return err;
62762306a36Sopenharmony_ci}
62862306a36Sopenharmony_ci
62962306a36Sopenharmony_cistatic void sk_psock_skb_state(struct sk_psock *psock,
63062306a36Sopenharmony_ci			       struct sk_psock_work_state *state,
63162306a36Sopenharmony_ci			       int len, int off)
63262306a36Sopenharmony_ci{
63362306a36Sopenharmony_ci	spin_lock_bh(&psock->ingress_lock);
63462306a36Sopenharmony_ci	if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) {
63562306a36Sopenharmony_ci		state->len = len;
63662306a36Sopenharmony_ci		state->off = off;
63762306a36Sopenharmony_ci	}
63862306a36Sopenharmony_ci	spin_unlock_bh(&psock->ingress_lock);
63962306a36Sopenharmony_ci}
64062306a36Sopenharmony_ci
64162306a36Sopenharmony_cistatic void sk_psock_backlog(struct work_struct *work)
64262306a36Sopenharmony_ci{
64362306a36Sopenharmony_ci	struct delayed_work *dwork = to_delayed_work(work);
64462306a36Sopenharmony_ci	struct sk_psock *psock = container_of(dwork, struct sk_psock, work);
64562306a36Sopenharmony_ci	struct sk_psock_work_state *state = &psock->work_state;
64662306a36Sopenharmony_ci	struct sk_buff *skb = NULL;
64762306a36Sopenharmony_ci	u32 len = 0, off = 0;
64862306a36Sopenharmony_ci	bool ingress;
64962306a36Sopenharmony_ci	int ret;
65062306a36Sopenharmony_ci
65162306a36Sopenharmony_ci	mutex_lock(&psock->work_mutex);
65262306a36Sopenharmony_ci	if (unlikely(state->len)) {
65362306a36Sopenharmony_ci		len = state->len;
65462306a36Sopenharmony_ci		off = state->off;
65562306a36Sopenharmony_ci	}
65662306a36Sopenharmony_ci
65762306a36Sopenharmony_ci	while ((skb = skb_peek(&psock->ingress_skb))) {
65862306a36Sopenharmony_ci		len = skb->len;
65962306a36Sopenharmony_ci		off = 0;
66062306a36Sopenharmony_ci		if (skb_bpf_strparser(skb)) {
66162306a36Sopenharmony_ci			struct strp_msg *stm = strp_msg(skb);
66262306a36Sopenharmony_ci
66362306a36Sopenharmony_ci			off = stm->offset;
66462306a36Sopenharmony_ci			len = stm->full_len;
66562306a36Sopenharmony_ci		}
66662306a36Sopenharmony_ci		ingress = skb_bpf_ingress(skb);
66762306a36Sopenharmony_ci		skb_bpf_redirect_clear(skb);
66862306a36Sopenharmony_ci		do {
66962306a36Sopenharmony_ci			ret = -EIO;
67062306a36Sopenharmony_ci			if (!sock_flag(psock->sk, SOCK_DEAD))
67162306a36Sopenharmony_ci				ret = sk_psock_handle_skb(psock, skb, off,
67262306a36Sopenharmony_ci							  len, ingress);
67362306a36Sopenharmony_ci			if (ret <= 0) {
67462306a36Sopenharmony_ci				if (ret == -EAGAIN) {
67562306a36Sopenharmony_ci					sk_psock_skb_state(psock, state, len, off);
67662306a36Sopenharmony_ci
67762306a36Sopenharmony_ci					/* Delay slightly to prioritize any
67862306a36Sopenharmony_ci					 * other work that might be here.
67962306a36Sopenharmony_ci					 */
68062306a36Sopenharmony_ci					if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED))
68162306a36Sopenharmony_ci						schedule_delayed_work(&psock->work, 1);
68262306a36Sopenharmony_ci					goto end;
68362306a36Sopenharmony_ci				}
68462306a36Sopenharmony_ci				/* Hard errors break pipe and stop xmit. */
68562306a36Sopenharmony_ci				sk_psock_report_error(psock, ret ? -ret : EPIPE);
68662306a36Sopenharmony_ci				sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED);
68762306a36Sopenharmony_ci				goto end;
68862306a36Sopenharmony_ci			}
68962306a36Sopenharmony_ci			off += ret;
69062306a36Sopenharmony_ci			len -= ret;
69162306a36Sopenharmony_ci		} while (len);
69262306a36Sopenharmony_ci
69362306a36Sopenharmony_ci		skb = skb_dequeue(&psock->ingress_skb);
69462306a36Sopenharmony_ci		kfree_skb(skb);
69562306a36Sopenharmony_ci	}
69662306a36Sopenharmony_ciend:
69762306a36Sopenharmony_ci	mutex_unlock(&psock->work_mutex);
69862306a36Sopenharmony_ci}
69962306a36Sopenharmony_ci
70062306a36Sopenharmony_cistruct sk_psock *sk_psock_init(struct sock *sk, int node)
70162306a36Sopenharmony_ci{
70262306a36Sopenharmony_ci	struct sk_psock *psock;
70362306a36Sopenharmony_ci	struct proto *prot;
70462306a36Sopenharmony_ci
70562306a36Sopenharmony_ci	write_lock_bh(&sk->sk_callback_lock);
70662306a36Sopenharmony_ci
70762306a36Sopenharmony_ci	if (sk_is_inet(sk) && inet_csk_has_ulp(sk)) {
70862306a36Sopenharmony_ci		psock = ERR_PTR(-EINVAL);
70962306a36Sopenharmony_ci		goto out;
71062306a36Sopenharmony_ci	}
71162306a36Sopenharmony_ci
71262306a36Sopenharmony_ci	if (sk->sk_user_data) {
71362306a36Sopenharmony_ci		psock = ERR_PTR(-EBUSY);
71462306a36Sopenharmony_ci		goto out;
71562306a36Sopenharmony_ci	}
71662306a36Sopenharmony_ci
71762306a36Sopenharmony_ci	psock = kzalloc_node(sizeof(*psock), GFP_ATOMIC | __GFP_NOWARN, node);
71862306a36Sopenharmony_ci	if (!psock) {
71962306a36Sopenharmony_ci		psock = ERR_PTR(-ENOMEM);
72062306a36Sopenharmony_ci		goto out;
72162306a36Sopenharmony_ci	}
72262306a36Sopenharmony_ci
72362306a36Sopenharmony_ci	prot = READ_ONCE(sk->sk_prot);
72462306a36Sopenharmony_ci	psock->sk = sk;
72562306a36Sopenharmony_ci	psock->eval = __SK_NONE;
72662306a36Sopenharmony_ci	psock->sk_proto = prot;
72762306a36Sopenharmony_ci	psock->saved_unhash = prot->unhash;
72862306a36Sopenharmony_ci	psock->saved_destroy = prot->destroy;
72962306a36Sopenharmony_ci	psock->saved_close = prot->close;
73062306a36Sopenharmony_ci	psock->saved_write_space = sk->sk_write_space;
73162306a36Sopenharmony_ci
73262306a36Sopenharmony_ci	INIT_LIST_HEAD(&psock->link);
73362306a36Sopenharmony_ci	spin_lock_init(&psock->link_lock);
73462306a36Sopenharmony_ci
73562306a36Sopenharmony_ci	INIT_DELAYED_WORK(&psock->work, sk_psock_backlog);
73662306a36Sopenharmony_ci	mutex_init(&psock->work_mutex);
73762306a36Sopenharmony_ci	INIT_LIST_HEAD(&psock->ingress_msg);
73862306a36Sopenharmony_ci	spin_lock_init(&psock->ingress_lock);
73962306a36Sopenharmony_ci	skb_queue_head_init(&psock->ingress_skb);
74062306a36Sopenharmony_ci
74162306a36Sopenharmony_ci	sk_psock_set_state(psock, SK_PSOCK_TX_ENABLED);
74262306a36Sopenharmony_ci	refcount_set(&psock->refcnt, 1);
74362306a36Sopenharmony_ci
74462306a36Sopenharmony_ci	__rcu_assign_sk_user_data_with_flags(sk, psock,
74562306a36Sopenharmony_ci					     SK_USER_DATA_NOCOPY |
74662306a36Sopenharmony_ci					     SK_USER_DATA_PSOCK);
74762306a36Sopenharmony_ci	sock_hold(sk);
74862306a36Sopenharmony_ci
74962306a36Sopenharmony_ciout:
75062306a36Sopenharmony_ci	write_unlock_bh(&sk->sk_callback_lock);
75162306a36Sopenharmony_ci	return psock;
75262306a36Sopenharmony_ci}
75362306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(sk_psock_init);
75462306a36Sopenharmony_ci
75562306a36Sopenharmony_cistruct sk_psock_link *sk_psock_link_pop(struct sk_psock *psock)
75662306a36Sopenharmony_ci{
75762306a36Sopenharmony_ci	struct sk_psock_link *link;
75862306a36Sopenharmony_ci
75962306a36Sopenharmony_ci	spin_lock_bh(&psock->link_lock);
76062306a36Sopenharmony_ci	link = list_first_entry_or_null(&psock->link, struct sk_psock_link,
76162306a36Sopenharmony_ci					list);
76262306a36Sopenharmony_ci	if (link)
76362306a36Sopenharmony_ci		list_del(&link->list);
76462306a36Sopenharmony_ci	spin_unlock_bh(&psock->link_lock);
76562306a36Sopenharmony_ci	return link;
76662306a36Sopenharmony_ci}
76762306a36Sopenharmony_ci
76862306a36Sopenharmony_cistatic void __sk_psock_purge_ingress_msg(struct sk_psock *psock)
76962306a36Sopenharmony_ci{
77062306a36Sopenharmony_ci	struct sk_msg *msg, *tmp;
77162306a36Sopenharmony_ci
77262306a36Sopenharmony_ci	list_for_each_entry_safe(msg, tmp, &psock->ingress_msg, list) {
77362306a36Sopenharmony_ci		list_del(&msg->list);
77462306a36Sopenharmony_ci		sk_msg_free(psock->sk, msg);
77562306a36Sopenharmony_ci		kfree(msg);
77662306a36Sopenharmony_ci	}
77762306a36Sopenharmony_ci}
77862306a36Sopenharmony_ci
77962306a36Sopenharmony_cistatic void __sk_psock_zap_ingress(struct sk_psock *psock)
78062306a36Sopenharmony_ci{
78162306a36Sopenharmony_ci	struct sk_buff *skb;
78262306a36Sopenharmony_ci
78362306a36Sopenharmony_ci	while ((skb = skb_dequeue(&psock->ingress_skb)) != NULL) {
78462306a36Sopenharmony_ci		skb_bpf_redirect_clear(skb);
78562306a36Sopenharmony_ci		sock_drop(psock->sk, skb);
78662306a36Sopenharmony_ci	}
78762306a36Sopenharmony_ci	__sk_psock_purge_ingress_msg(psock);
78862306a36Sopenharmony_ci}
78962306a36Sopenharmony_ci
79062306a36Sopenharmony_cistatic void sk_psock_link_destroy(struct sk_psock *psock)
79162306a36Sopenharmony_ci{
79262306a36Sopenharmony_ci	struct sk_psock_link *link, *tmp;
79362306a36Sopenharmony_ci
79462306a36Sopenharmony_ci	list_for_each_entry_safe(link, tmp, &psock->link, list) {
79562306a36Sopenharmony_ci		list_del(&link->list);
79662306a36Sopenharmony_ci		sk_psock_free_link(link);
79762306a36Sopenharmony_ci	}
79862306a36Sopenharmony_ci}
79962306a36Sopenharmony_ci
80062306a36Sopenharmony_civoid sk_psock_stop(struct sk_psock *psock)
80162306a36Sopenharmony_ci{
80262306a36Sopenharmony_ci	spin_lock_bh(&psock->ingress_lock);
80362306a36Sopenharmony_ci	sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED);
80462306a36Sopenharmony_ci	sk_psock_cork_free(psock);
80562306a36Sopenharmony_ci	spin_unlock_bh(&psock->ingress_lock);
80662306a36Sopenharmony_ci}
80762306a36Sopenharmony_ci
80862306a36Sopenharmony_cistatic void sk_psock_done_strp(struct sk_psock *psock);
80962306a36Sopenharmony_ci
81062306a36Sopenharmony_cistatic void sk_psock_destroy(struct work_struct *work)
81162306a36Sopenharmony_ci{
81262306a36Sopenharmony_ci	struct sk_psock *psock = container_of(to_rcu_work(work),
81362306a36Sopenharmony_ci					      struct sk_psock, rwork);
81462306a36Sopenharmony_ci	/* No sk_callback_lock since already detached. */
81562306a36Sopenharmony_ci
81662306a36Sopenharmony_ci	sk_psock_done_strp(psock);
81762306a36Sopenharmony_ci
81862306a36Sopenharmony_ci	cancel_delayed_work_sync(&psock->work);
81962306a36Sopenharmony_ci	__sk_psock_zap_ingress(psock);
82062306a36Sopenharmony_ci	mutex_destroy(&psock->work_mutex);
82162306a36Sopenharmony_ci
82262306a36Sopenharmony_ci	psock_progs_drop(&psock->progs);
82362306a36Sopenharmony_ci
82462306a36Sopenharmony_ci	sk_psock_link_destroy(psock);
82562306a36Sopenharmony_ci	sk_psock_cork_free(psock);
82662306a36Sopenharmony_ci
82762306a36Sopenharmony_ci	if (psock->sk_redir)
82862306a36Sopenharmony_ci		sock_put(psock->sk_redir);
82962306a36Sopenharmony_ci	if (psock->sk_pair)
83062306a36Sopenharmony_ci		sock_put(psock->sk_pair);
83162306a36Sopenharmony_ci	sock_put(psock->sk);
83262306a36Sopenharmony_ci	kfree(psock);
83362306a36Sopenharmony_ci}
83462306a36Sopenharmony_ci
83562306a36Sopenharmony_civoid sk_psock_drop(struct sock *sk, struct sk_psock *psock)
83662306a36Sopenharmony_ci{
83762306a36Sopenharmony_ci	write_lock_bh(&sk->sk_callback_lock);
83862306a36Sopenharmony_ci	sk_psock_restore_proto(sk, psock);
83962306a36Sopenharmony_ci	rcu_assign_sk_user_data(sk, NULL);
84062306a36Sopenharmony_ci	if (psock->progs.stream_parser)
84162306a36Sopenharmony_ci		sk_psock_stop_strp(sk, psock);
84262306a36Sopenharmony_ci	else if (psock->progs.stream_verdict || psock->progs.skb_verdict)
84362306a36Sopenharmony_ci		sk_psock_stop_verdict(sk, psock);
84462306a36Sopenharmony_ci	write_unlock_bh(&sk->sk_callback_lock);
84562306a36Sopenharmony_ci
84662306a36Sopenharmony_ci	sk_psock_stop(psock);
84762306a36Sopenharmony_ci
84862306a36Sopenharmony_ci	INIT_RCU_WORK(&psock->rwork, sk_psock_destroy);
84962306a36Sopenharmony_ci	queue_rcu_work(system_wq, &psock->rwork);
85062306a36Sopenharmony_ci}
85162306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(sk_psock_drop);
85262306a36Sopenharmony_ci
85362306a36Sopenharmony_cistatic int sk_psock_map_verd(int verdict, bool redir)
85462306a36Sopenharmony_ci{
85562306a36Sopenharmony_ci	switch (verdict) {
85662306a36Sopenharmony_ci	case SK_PASS:
85762306a36Sopenharmony_ci		return redir ? __SK_REDIRECT : __SK_PASS;
85862306a36Sopenharmony_ci	case SK_DROP:
85962306a36Sopenharmony_ci	default:
86062306a36Sopenharmony_ci		break;
86162306a36Sopenharmony_ci	}
86262306a36Sopenharmony_ci
86362306a36Sopenharmony_ci	return __SK_DROP;
86462306a36Sopenharmony_ci}
86562306a36Sopenharmony_ci
86662306a36Sopenharmony_ciint sk_psock_msg_verdict(struct sock *sk, struct sk_psock *psock,
86762306a36Sopenharmony_ci			 struct sk_msg *msg)
86862306a36Sopenharmony_ci{
86962306a36Sopenharmony_ci	struct bpf_prog *prog;
87062306a36Sopenharmony_ci	int ret;
87162306a36Sopenharmony_ci
87262306a36Sopenharmony_ci	rcu_read_lock();
87362306a36Sopenharmony_ci	prog = READ_ONCE(psock->progs.msg_parser);
87462306a36Sopenharmony_ci	if (unlikely(!prog)) {
87562306a36Sopenharmony_ci		ret = __SK_PASS;
87662306a36Sopenharmony_ci		goto out;
87762306a36Sopenharmony_ci	}
87862306a36Sopenharmony_ci
87962306a36Sopenharmony_ci	sk_msg_compute_data_pointers(msg);
88062306a36Sopenharmony_ci	msg->sk = sk;
88162306a36Sopenharmony_ci	ret = bpf_prog_run_pin_on_cpu(prog, msg);
88262306a36Sopenharmony_ci	ret = sk_psock_map_verd(ret, msg->sk_redir);
88362306a36Sopenharmony_ci	psock->apply_bytes = msg->apply_bytes;
88462306a36Sopenharmony_ci	if (ret == __SK_REDIRECT) {
88562306a36Sopenharmony_ci		if (psock->sk_redir) {
88662306a36Sopenharmony_ci			sock_put(psock->sk_redir);
88762306a36Sopenharmony_ci			psock->sk_redir = NULL;
88862306a36Sopenharmony_ci		}
88962306a36Sopenharmony_ci		if (!msg->sk_redir) {
89062306a36Sopenharmony_ci			ret = __SK_DROP;
89162306a36Sopenharmony_ci			goto out;
89262306a36Sopenharmony_ci		}
89362306a36Sopenharmony_ci		psock->redir_ingress = sk_msg_to_ingress(msg);
89462306a36Sopenharmony_ci		psock->sk_redir = msg->sk_redir;
89562306a36Sopenharmony_ci		sock_hold(psock->sk_redir);
89662306a36Sopenharmony_ci	}
89762306a36Sopenharmony_ciout:
89862306a36Sopenharmony_ci	rcu_read_unlock();
89962306a36Sopenharmony_ci	return ret;
90062306a36Sopenharmony_ci}
90162306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(sk_psock_msg_verdict);
90262306a36Sopenharmony_ci
90362306a36Sopenharmony_cistatic int sk_psock_skb_redirect(struct sk_psock *from, struct sk_buff *skb)
90462306a36Sopenharmony_ci{
90562306a36Sopenharmony_ci	struct sk_psock *psock_other;
90662306a36Sopenharmony_ci	struct sock *sk_other;
90762306a36Sopenharmony_ci
90862306a36Sopenharmony_ci	sk_other = skb_bpf_redirect_fetch(skb);
90962306a36Sopenharmony_ci	/* This error is a buggy BPF program, it returned a redirect
91062306a36Sopenharmony_ci	 * return code, but then didn't set a redirect interface.
91162306a36Sopenharmony_ci	 */
91262306a36Sopenharmony_ci	if (unlikely(!sk_other)) {
91362306a36Sopenharmony_ci		skb_bpf_redirect_clear(skb);
91462306a36Sopenharmony_ci		sock_drop(from->sk, skb);
91562306a36Sopenharmony_ci		return -EIO;
91662306a36Sopenharmony_ci	}
91762306a36Sopenharmony_ci	psock_other = sk_psock(sk_other);
91862306a36Sopenharmony_ci	/* This error indicates the socket is being torn down or had another
91962306a36Sopenharmony_ci	 * error that caused the pipe to break. We can't send a packet on
92062306a36Sopenharmony_ci	 * a socket that is in this state so we drop the skb.
92162306a36Sopenharmony_ci	 */
92262306a36Sopenharmony_ci	if (!psock_other || sock_flag(sk_other, SOCK_DEAD)) {
92362306a36Sopenharmony_ci		skb_bpf_redirect_clear(skb);
92462306a36Sopenharmony_ci		sock_drop(from->sk, skb);
92562306a36Sopenharmony_ci		return -EIO;
92662306a36Sopenharmony_ci	}
92762306a36Sopenharmony_ci	spin_lock_bh(&psock_other->ingress_lock);
92862306a36Sopenharmony_ci	if (!sk_psock_test_state(psock_other, SK_PSOCK_TX_ENABLED)) {
92962306a36Sopenharmony_ci		spin_unlock_bh(&psock_other->ingress_lock);
93062306a36Sopenharmony_ci		skb_bpf_redirect_clear(skb);
93162306a36Sopenharmony_ci		sock_drop(from->sk, skb);
93262306a36Sopenharmony_ci		return -EIO;
93362306a36Sopenharmony_ci	}
93462306a36Sopenharmony_ci
93562306a36Sopenharmony_ci	skb_queue_tail(&psock_other->ingress_skb, skb);
93662306a36Sopenharmony_ci	schedule_delayed_work(&psock_other->work, 0);
93762306a36Sopenharmony_ci	spin_unlock_bh(&psock_other->ingress_lock);
93862306a36Sopenharmony_ci	return 0;
93962306a36Sopenharmony_ci}
94062306a36Sopenharmony_ci
94162306a36Sopenharmony_cistatic void sk_psock_tls_verdict_apply(struct sk_buff *skb,
94262306a36Sopenharmony_ci				       struct sk_psock *from, int verdict)
94362306a36Sopenharmony_ci{
94462306a36Sopenharmony_ci	switch (verdict) {
94562306a36Sopenharmony_ci	case __SK_REDIRECT:
94662306a36Sopenharmony_ci		sk_psock_skb_redirect(from, skb);
94762306a36Sopenharmony_ci		break;
94862306a36Sopenharmony_ci	case __SK_PASS:
94962306a36Sopenharmony_ci	case __SK_DROP:
95062306a36Sopenharmony_ci	default:
95162306a36Sopenharmony_ci		break;
95262306a36Sopenharmony_ci	}
95362306a36Sopenharmony_ci}
95462306a36Sopenharmony_ci
95562306a36Sopenharmony_ciint sk_psock_tls_strp_read(struct sk_psock *psock, struct sk_buff *skb)
95662306a36Sopenharmony_ci{
95762306a36Sopenharmony_ci	struct bpf_prog *prog;
95862306a36Sopenharmony_ci	int ret = __SK_PASS;
95962306a36Sopenharmony_ci
96062306a36Sopenharmony_ci	rcu_read_lock();
96162306a36Sopenharmony_ci	prog = READ_ONCE(psock->progs.stream_verdict);
96262306a36Sopenharmony_ci	if (likely(prog)) {
96362306a36Sopenharmony_ci		skb->sk = psock->sk;
96462306a36Sopenharmony_ci		skb_dst_drop(skb);
96562306a36Sopenharmony_ci		skb_bpf_redirect_clear(skb);
96662306a36Sopenharmony_ci		ret = bpf_prog_run_pin_on_cpu(prog, skb);
96762306a36Sopenharmony_ci		ret = sk_psock_map_verd(ret, skb_bpf_redirect_fetch(skb));
96862306a36Sopenharmony_ci		skb->sk = NULL;
96962306a36Sopenharmony_ci	}
97062306a36Sopenharmony_ci	sk_psock_tls_verdict_apply(skb, psock, ret);
97162306a36Sopenharmony_ci	rcu_read_unlock();
97262306a36Sopenharmony_ci	return ret;
97362306a36Sopenharmony_ci}
97462306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(sk_psock_tls_strp_read);
97562306a36Sopenharmony_ci
97662306a36Sopenharmony_cistatic int sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb,
97762306a36Sopenharmony_ci				  int verdict)
97862306a36Sopenharmony_ci{
97962306a36Sopenharmony_ci	struct sock *sk_other;
98062306a36Sopenharmony_ci	int err = 0;
98162306a36Sopenharmony_ci	u32 len, off;
98262306a36Sopenharmony_ci
98362306a36Sopenharmony_ci	switch (verdict) {
98462306a36Sopenharmony_ci	case __SK_PASS:
98562306a36Sopenharmony_ci		err = -EIO;
98662306a36Sopenharmony_ci		sk_other = psock->sk;
98762306a36Sopenharmony_ci		if (sock_flag(sk_other, SOCK_DEAD) ||
98862306a36Sopenharmony_ci		    !sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED))
98962306a36Sopenharmony_ci			goto out_free;
99062306a36Sopenharmony_ci
99162306a36Sopenharmony_ci		skb_bpf_set_ingress(skb);
99262306a36Sopenharmony_ci
99362306a36Sopenharmony_ci		/* If the queue is empty then we can submit directly
99462306a36Sopenharmony_ci		 * into the msg queue. If its not empty we have to
99562306a36Sopenharmony_ci		 * queue work otherwise we may get OOO data. Otherwise,
99662306a36Sopenharmony_ci		 * if sk_psock_skb_ingress errors will be handled by
99762306a36Sopenharmony_ci		 * retrying later from workqueue.
99862306a36Sopenharmony_ci		 */
99962306a36Sopenharmony_ci		if (skb_queue_empty(&psock->ingress_skb)) {
100062306a36Sopenharmony_ci			len = skb->len;
100162306a36Sopenharmony_ci			off = 0;
100262306a36Sopenharmony_ci			if (skb_bpf_strparser(skb)) {
100362306a36Sopenharmony_ci				struct strp_msg *stm = strp_msg(skb);
100462306a36Sopenharmony_ci
100562306a36Sopenharmony_ci				off = stm->offset;
100662306a36Sopenharmony_ci				len = stm->full_len;
100762306a36Sopenharmony_ci			}
100862306a36Sopenharmony_ci			err = sk_psock_skb_ingress_self(psock, skb, off, len);
100962306a36Sopenharmony_ci		}
101062306a36Sopenharmony_ci		if (err < 0) {
101162306a36Sopenharmony_ci			spin_lock_bh(&psock->ingress_lock);
101262306a36Sopenharmony_ci			if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) {
101362306a36Sopenharmony_ci				skb_queue_tail(&psock->ingress_skb, skb);
101462306a36Sopenharmony_ci				schedule_delayed_work(&psock->work, 0);
101562306a36Sopenharmony_ci				err = 0;
101662306a36Sopenharmony_ci			}
101762306a36Sopenharmony_ci			spin_unlock_bh(&psock->ingress_lock);
101862306a36Sopenharmony_ci			if (err < 0)
101962306a36Sopenharmony_ci				goto out_free;
102062306a36Sopenharmony_ci		}
102162306a36Sopenharmony_ci		break;
102262306a36Sopenharmony_ci	case __SK_REDIRECT:
102362306a36Sopenharmony_ci		tcp_eat_skb(psock->sk, skb);
102462306a36Sopenharmony_ci		err = sk_psock_skb_redirect(psock, skb);
102562306a36Sopenharmony_ci		break;
102662306a36Sopenharmony_ci	case __SK_DROP:
102762306a36Sopenharmony_ci	default:
102862306a36Sopenharmony_ciout_free:
102962306a36Sopenharmony_ci		skb_bpf_redirect_clear(skb);
103062306a36Sopenharmony_ci		tcp_eat_skb(psock->sk, skb);
103162306a36Sopenharmony_ci		sock_drop(psock->sk, skb);
103262306a36Sopenharmony_ci	}
103362306a36Sopenharmony_ci
103462306a36Sopenharmony_ci	return err;
103562306a36Sopenharmony_ci}
103662306a36Sopenharmony_ci
103762306a36Sopenharmony_cistatic void sk_psock_write_space(struct sock *sk)
103862306a36Sopenharmony_ci{
103962306a36Sopenharmony_ci	struct sk_psock *psock;
104062306a36Sopenharmony_ci	void (*write_space)(struct sock *sk) = NULL;
104162306a36Sopenharmony_ci
104262306a36Sopenharmony_ci	rcu_read_lock();
104362306a36Sopenharmony_ci	psock = sk_psock(sk);
104462306a36Sopenharmony_ci	if (likely(psock)) {
104562306a36Sopenharmony_ci		if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED))
104662306a36Sopenharmony_ci			schedule_delayed_work(&psock->work, 0);
104762306a36Sopenharmony_ci		write_space = psock->saved_write_space;
104862306a36Sopenharmony_ci	}
104962306a36Sopenharmony_ci	rcu_read_unlock();
105062306a36Sopenharmony_ci	if (write_space)
105162306a36Sopenharmony_ci		write_space(sk);
105262306a36Sopenharmony_ci}
105362306a36Sopenharmony_ci
105462306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
105562306a36Sopenharmony_cistatic void sk_psock_strp_read(struct strparser *strp, struct sk_buff *skb)
105662306a36Sopenharmony_ci{
105762306a36Sopenharmony_ci	struct sk_psock *psock;
105862306a36Sopenharmony_ci	struct bpf_prog *prog;
105962306a36Sopenharmony_ci	int ret = __SK_DROP;
106062306a36Sopenharmony_ci	struct sock *sk;
106162306a36Sopenharmony_ci
106262306a36Sopenharmony_ci	rcu_read_lock();
106362306a36Sopenharmony_ci	sk = strp->sk;
106462306a36Sopenharmony_ci	psock = sk_psock(sk);
106562306a36Sopenharmony_ci	if (unlikely(!psock)) {
106662306a36Sopenharmony_ci		sock_drop(sk, skb);
106762306a36Sopenharmony_ci		goto out;
106862306a36Sopenharmony_ci	}
106962306a36Sopenharmony_ci	prog = READ_ONCE(psock->progs.stream_verdict);
107062306a36Sopenharmony_ci	if (likely(prog)) {
107162306a36Sopenharmony_ci		skb->sk = sk;
107262306a36Sopenharmony_ci		skb_dst_drop(skb);
107362306a36Sopenharmony_ci		skb_bpf_redirect_clear(skb);
107462306a36Sopenharmony_ci		ret = bpf_prog_run_pin_on_cpu(prog, skb);
107562306a36Sopenharmony_ci		skb_bpf_set_strparser(skb);
107662306a36Sopenharmony_ci		ret = sk_psock_map_verd(ret, skb_bpf_redirect_fetch(skb));
107762306a36Sopenharmony_ci		skb->sk = NULL;
107862306a36Sopenharmony_ci	}
107962306a36Sopenharmony_ci	sk_psock_verdict_apply(psock, skb, ret);
108062306a36Sopenharmony_ciout:
108162306a36Sopenharmony_ci	rcu_read_unlock();
108262306a36Sopenharmony_ci}
108362306a36Sopenharmony_ci
108462306a36Sopenharmony_cistatic int sk_psock_strp_read_done(struct strparser *strp, int err)
108562306a36Sopenharmony_ci{
108662306a36Sopenharmony_ci	return err;
108762306a36Sopenharmony_ci}
108862306a36Sopenharmony_ci
108962306a36Sopenharmony_cistatic int sk_psock_strp_parse(struct strparser *strp, struct sk_buff *skb)
109062306a36Sopenharmony_ci{
109162306a36Sopenharmony_ci	struct sk_psock *psock = container_of(strp, struct sk_psock, strp);
109262306a36Sopenharmony_ci	struct bpf_prog *prog;
109362306a36Sopenharmony_ci	int ret = skb->len;
109462306a36Sopenharmony_ci
109562306a36Sopenharmony_ci	rcu_read_lock();
109662306a36Sopenharmony_ci	prog = READ_ONCE(psock->progs.stream_parser);
109762306a36Sopenharmony_ci	if (likely(prog)) {
109862306a36Sopenharmony_ci		skb->sk = psock->sk;
109962306a36Sopenharmony_ci		ret = bpf_prog_run_pin_on_cpu(prog, skb);
110062306a36Sopenharmony_ci		skb->sk = NULL;
110162306a36Sopenharmony_ci	}
110262306a36Sopenharmony_ci	rcu_read_unlock();
110362306a36Sopenharmony_ci	return ret;
110462306a36Sopenharmony_ci}
110562306a36Sopenharmony_ci
110662306a36Sopenharmony_ci/* Called with socket lock held. */
110762306a36Sopenharmony_cistatic void sk_psock_strp_data_ready(struct sock *sk)
110862306a36Sopenharmony_ci{
110962306a36Sopenharmony_ci	struct sk_psock *psock;
111062306a36Sopenharmony_ci
111162306a36Sopenharmony_ci	trace_sk_data_ready(sk);
111262306a36Sopenharmony_ci
111362306a36Sopenharmony_ci	rcu_read_lock();
111462306a36Sopenharmony_ci	psock = sk_psock(sk);
111562306a36Sopenharmony_ci	if (likely(psock)) {
111662306a36Sopenharmony_ci		if (tls_sw_has_ctx_rx(sk)) {
111762306a36Sopenharmony_ci			psock->saved_data_ready(sk);
111862306a36Sopenharmony_ci		} else {
111962306a36Sopenharmony_ci			write_lock_bh(&sk->sk_callback_lock);
112062306a36Sopenharmony_ci			strp_data_ready(&psock->strp);
112162306a36Sopenharmony_ci			write_unlock_bh(&sk->sk_callback_lock);
112262306a36Sopenharmony_ci		}
112362306a36Sopenharmony_ci	}
112462306a36Sopenharmony_ci	rcu_read_unlock();
112562306a36Sopenharmony_ci}
112662306a36Sopenharmony_ci
112762306a36Sopenharmony_ciint sk_psock_init_strp(struct sock *sk, struct sk_psock *psock)
112862306a36Sopenharmony_ci{
112962306a36Sopenharmony_ci	int ret;
113062306a36Sopenharmony_ci
113162306a36Sopenharmony_ci	static const struct strp_callbacks cb = {
113262306a36Sopenharmony_ci		.rcv_msg	= sk_psock_strp_read,
113362306a36Sopenharmony_ci		.read_sock_done	= sk_psock_strp_read_done,
113462306a36Sopenharmony_ci		.parse_msg	= sk_psock_strp_parse,
113562306a36Sopenharmony_ci	};
113662306a36Sopenharmony_ci
113762306a36Sopenharmony_ci	ret = strp_init(&psock->strp, sk, &cb);
113862306a36Sopenharmony_ci	if (!ret)
113962306a36Sopenharmony_ci		sk_psock_set_state(psock, SK_PSOCK_RX_STRP_ENABLED);
114062306a36Sopenharmony_ci
114162306a36Sopenharmony_ci	return ret;
114262306a36Sopenharmony_ci}
114362306a36Sopenharmony_ci
114462306a36Sopenharmony_civoid sk_psock_start_strp(struct sock *sk, struct sk_psock *psock)
114562306a36Sopenharmony_ci{
114662306a36Sopenharmony_ci	if (psock->saved_data_ready)
114762306a36Sopenharmony_ci		return;
114862306a36Sopenharmony_ci
114962306a36Sopenharmony_ci	psock->saved_data_ready = sk->sk_data_ready;
115062306a36Sopenharmony_ci	sk->sk_data_ready = sk_psock_strp_data_ready;
115162306a36Sopenharmony_ci	sk->sk_write_space = sk_psock_write_space;
115262306a36Sopenharmony_ci}
115362306a36Sopenharmony_ci
115462306a36Sopenharmony_civoid sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock)
115562306a36Sopenharmony_ci{
115662306a36Sopenharmony_ci	psock_set_prog(&psock->progs.stream_parser, NULL);
115762306a36Sopenharmony_ci
115862306a36Sopenharmony_ci	if (!psock->saved_data_ready)
115962306a36Sopenharmony_ci		return;
116062306a36Sopenharmony_ci
116162306a36Sopenharmony_ci	sk->sk_data_ready = psock->saved_data_ready;
116262306a36Sopenharmony_ci	psock->saved_data_ready = NULL;
116362306a36Sopenharmony_ci	strp_stop(&psock->strp);
116462306a36Sopenharmony_ci}
116562306a36Sopenharmony_ci
116662306a36Sopenharmony_cistatic void sk_psock_done_strp(struct sk_psock *psock)
116762306a36Sopenharmony_ci{
116862306a36Sopenharmony_ci	/* Parser has been stopped */
116962306a36Sopenharmony_ci	if (sk_psock_test_state(psock, SK_PSOCK_RX_STRP_ENABLED))
117062306a36Sopenharmony_ci		strp_done(&psock->strp);
117162306a36Sopenharmony_ci}
117262306a36Sopenharmony_ci#else
117362306a36Sopenharmony_cistatic void sk_psock_done_strp(struct sk_psock *psock)
117462306a36Sopenharmony_ci{
117562306a36Sopenharmony_ci}
117662306a36Sopenharmony_ci#endif /* CONFIG_BPF_STREAM_PARSER */
117762306a36Sopenharmony_ci
117862306a36Sopenharmony_cistatic int sk_psock_verdict_recv(struct sock *sk, struct sk_buff *skb)
117962306a36Sopenharmony_ci{
118062306a36Sopenharmony_ci	struct sk_psock *psock;
118162306a36Sopenharmony_ci	struct bpf_prog *prog;
118262306a36Sopenharmony_ci	int ret = __SK_DROP;
118362306a36Sopenharmony_ci	int len = skb->len;
118462306a36Sopenharmony_ci
118562306a36Sopenharmony_ci	rcu_read_lock();
118662306a36Sopenharmony_ci	psock = sk_psock(sk);
118762306a36Sopenharmony_ci	if (unlikely(!psock)) {
118862306a36Sopenharmony_ci		len = 0;
118962306a36Sopenharmony_ci		tcp_eat_skb(sk, skb);
119062306a36Sopenharmony_ci		sock_drop(sk, skb);
119162306a36Sopenharmony_ci		goto out;
119262306a36Sopenharmony_ci	}
119362306a36Sopenharmony_ci	prog = READ_ONCE(psock->progs.stream_verdict);
119462306a36Sopenharmony_ci	if (!prog)
119562306a36Sopenharmony_ci		prog = READ_ONCE(psock->progs.skb_verdict);
119662306a36Sopenharmony_ci	if (likely(prog)) {
119762306a36Sopenharmony_ci		skb_dst_drop(skb);
119862306a36Sopenharmony_ci		skb_bpf_redirect_clear(skb);
119962306a36Sopenharmony_ci		ret = bpf_prog_run_pin_on_cpu(prog, skb);
120062306a36Sopenharmony_ci		ret = sk_psock_map_verd(ret, skb_bpf_redirect_fetch(skb));
120162306a36Sopenharmony_ci	}
120262306a36Sopenharmony_ci	ret = sk_psock_verdict_apply(psock, skb, ret);
120362306a36Sopenharmony_ci	if (ret < 0)
120462306a36Sopenharmony_ci		len = ret;
120562306a36Sopenharmony_ciout:
120662306a36Sopenharmony_ci	rcu_read_unlock();
120762306a36Sopenharmony_ci	return len;
120862306a36Sopenharmony_ci}
120962306a36Sopenharmony_ci
121062306a36Sopenharmony_cistatic void sk_psock_verdict_data_ready(struct sock *sk)
121162306a36Sopenharmony_ci{
121262306a36Sopenharmony_ci	struct socket *sock = sk->sk_socket;
121362306a36Sopenharmony_ci	const struct proto_ops *ops;
121462306a36Sopenharmony_ci	int copied;
121562306a36Sopenharmony_ci
121662306a36Sopenharmony_ci	trace_sk_data_ready(sk);
121762306a36Sopenharmony_ci
121862306a36Sopenharmony_ci	if (unlikely(!sock))
121962306a36Sopenharmony_ci		return;
122062306a36Sopenharmony_ci	ops = READ_ONCE(sock->ops);
122162306a36Sopenharmony_ci	if (!ops || !ops->read_skb)
122262306a36Sopenharmony_ci		return;
122362306a36Sopenharmony_ci	copied = ops->read_skb(sk, sk_psock_verdict_recv);
122462306a36Sopenharmony_ci	if (copied >= 0) {
122562306a36Sopenharmony_ci		struct sk_psock *psock;
122662306a36Sopenharmony_ci
122762306a36Sopenharmony_ci		rcu_read_lock();
122862306a36Sopenharmony_ci		psock = sk_psock(sk);
122962306a36Sopenharmony_ci		if (psock) {
123062306a36Sopenharmony_ci			read_lock_bh(&sk->sk_callback_lock);
123162306a36Sopenharmony_ci			sk_psock_data_ready(sk, psock);
123262306a36Sopenharmony_ci			read_unlock_bh(&sk->sk_callback_lock);
123362306a36Sopenharmony_ci		}
123462306a36Sopenharmony_ci		rcu_read_unlock();
123562306a36Sopenharmony_ci	}
123662306a36Sopenharmony_ci}
123762306a36Sopenharmony_ci
123862306a36Sopenharmony_civoid sk_psock_start_verdict(struct sock *sk, struct sk_psock *psock)
123962306a36Sopenharmony_ci{
124062306a36Sopenharmony_ci	if (psock->saved_data_ready)
124162306a36Sopenharmony_ci		return;
124262306a36Sopenharmony_ci
124362306a36Sopenharmony_ci	psock->saved_data_ready = sk->sk_data_ready;
124462306a36Sopenharmony_ci	sk->sk_data_ready = sk_psock_verdict_data_ready;
124562306a36Sopenharmony_ci	sk->sk_write_space = sk_psock_write_space;
124662306a36Sopenharmony_ci}
124762306a36Sopenharmony_ci
124862306a36Sopenharmony_civoid sk_psock_stop_verdict(struct sock *sk, struct sk_psock *psock)
124962306a36Sopenharmony_ci{
125062306a36Sopenharmony_ci	psock_set_prog(&psock->progs.stream_verdict, NULL);
125162306a36Sopenharmony_ci	psock_set_prog(&psock->progs.skb_verdict, NULL);
125262306a36Sopenharmony_ci
125362306a36Sopenharmony_ci	if (!psock->saved_data_ready)
125462306a36Sopenharmony_ci		return;
125562306a36Sopenharmony_ci
125662306a36Sopenharmony_ci	sk->sk_data_ready = psock->saved_data_ready;
125762306a36Sopenharmony_ci	psock->saved_data_ready = NULL;
125862306a36Sopenharmony_ci}
1259