162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci/* Copyright (c) 2017 - 2018 Covalent IO, Inc. http://covalent.io */ 362306a36Sopenharmony_ci 462306a36Sopenharmony_ci#include <linux/skmsg.h> 562306a36Sopenharmony_ci#include <linux/skbuff.h> 662306a36Sopenharmony_ci#include <linux/scatterlist.h> 762306a36Sopenharmony_ci 862306a36Sopenharmony_ci#include <net/sock.h> 962306a36Sopenharmony_ci#include <net/tcp.h> 1062306a36Sopenharmony_ci#include <net/tls.h> 1162306a36Sopenharmony_ci#include <trace/events/sock.h> 1262306a36Sopenharmony_ci 1362306a36Sopenharmony_cistatic bool sk_msg_try_coalesce_ok(struct sk_msg *msg, int elem_first_coalesce) 1462306a36Sopenharmony_ci{ 1562306a36Sopenharmony_ci if (msg->sg.end > msg->sg.start && 1662306a36Sopenharmony_ci elem_first_coalesce < msg->sg.end) 1762306a36Sopenharmony_ci return true; 1862306a36Sopenharmony_ci 1962306a36Sopenharmony_ci if (msg->sg.end < msg->sg.start && 2062306a36Sopenharmony_ci (elem_first_coalesce > msg->sg.start || 2162306a36Sopenharmony_ci elem_first_coalesce < msg->sg.end)) 2262306a36Sopenharmony_ci return true; 2362306a36Sopenharmony_ci 2462306a36Sopenharmony_ci return false; 2562306a36Sopenharmony_ci} 2662306a36Sopenharmony_ci 2762306a36Sopenharmony_ciint sk_msg_alloc(struct sock *sk, struct sk_msg *msg, int len, 2862306a36Sopenharmony_ci int elem_first_coalesce) 2962306a36Sopenharmony_ci{ 3062306a36Sopenharmony_ci struct page_frag *pfrag = sk_page_frag(sk); 3162306a36Sopenharmony_ci u32 osize = msg->sg.size; 3262306a36Sopenharmony_ci int ret = 0; 3362306a36Sopenharmony_ci 3462306a36Sopenharmony_ci len -= msg->sg.size; 3562306a36Sopenharmony_ci while (len > 0) { 3662306a36Sopenharmony_ci struct scatterlist *sge; 3762306a36Sopenharmony_ci u32 orig_offset; 3862306a36Sopenharmony_ci int use, i; 3962306a36Sopenharmony_ci 4062306a36Sopenharmony_ci if (!sk_page_frag_refill(sk, pfrag)) { 4162306a36Sopenharmony_ci ret = -ENOMEM; 4262306a36Sopenharmony_ci goto msg_trim; 4362306a36Sopenharmony_ci } 4462306a36Sopenharmony_ci 4562306a36Sopenharmony_ci orig_offset = pfrag->offset; 4662306a36Sopenharmony_ci use = min_t(int, len, pfrag->size - orig_offset); 4762306a36Sopenharmony_ci if (!sk_wmem_schedule(sk, use)) { 4862306a36Sopenharmony_ci ret = -ENOMEM; 4962306a36Sopenharmony_ci goto msg_trim; 5062306a36Sopenharmony_ci } 5162306a36Sopenharmony_ci 5262306a36Sopenharmony_ci i = msg->sg.end; 5362306a36Sopenharmony_ci sk_msg_iter_var_prev(i); 5462306a36Sopenharmony_ci sge = &msg->sg.data[i]; 5562306a36Sopenharmony_ci 5662306a36Sopenharmony_ci if (sk_msg_try_coalesce_ok(msg, elem_first_coalesce) && 5762306a36Sopenharmony_ci sg_page(sge) == pfrag->page && 5862306a36Sopenharmony_ci sge->offset + sge->length == orig_offset) { 5962306a36Sopenharmony_ci sge->length += use; 6062306a36Sopenharmony_ci } else { 6162306a36Sopenharmony_ci if (sk_msg_full(msg)) { 6262306a36Sopenharmony_ci ret = -ENOSPC; 6362306a36Sopenharmony_ci break; 6462306a36Sopenharmony_ci } 6562306a36Sopenharmony_ci 6662306a36Sopenharmony_ci sge = &msg->sg.data[msg->sg.end]; 6762306a36Sopenharmony_ci sg_unmark_end(sge); 6862306a36Sopenharmony_ci sg_set_page(sge, pfrag->page, use, orig_offset); 6962306a36Sopenharmony_ci get_page(pfrag->page); 7062306a36Sopenharmony_ci sk_msg_iter_next(msg, end); 7162306a36Sopenharmony_ci } 7262306a36Sopenharmony_ci 7362306a36Sopenharmony_ci sk_mem_charge(sk, use); 7462306a36Sopenharmony_ci msg->sg.size += use; 7562306a36Sopenharmony_ci pfrag->offset += use; 7662306a36Sopenharmony_ci len -= use; 7762306a36Sopenharmony_ci } 7862306a36Sopenharmony_ci 7962306a36Sopenharmony_ci return ret; 8062306a36Sopenharmony_ci 8162306a36Sopenharmony_cimsg_trim: 8262306a36Sopenharmony_ci sk_msg_trim(sk, msg, osize); 8362306a36Sopenharmony_ci return ret; 8462306a36Sopenharmony_ci} 8562306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(sk_msg_alloc); 8662306a36Sopenharmony_ci 8762306a36Sopenharmony_ciint sk_msg_clone(struct sock *sk, struct sk_msg *dst, struct sk_msg *src, 8862306a36Sopenharmony_ci u32 off, u32 len) 8962306a36Sopenharmony_ci{ 9062306a36Sopenharmony_ci int i = src->sg.start; 9162306a36Sopenharmony_ci struct scatterlist *sge = sk_msg_elem(src, i); 9262306a36Sopenharmony_ci struct scatterlist *sgd = NULL; 9362306a36Sopenharmony_ci u32 sge_len, sge_off; 9462306a36Sopenharmony_ci 9562306a36Sopenharmony_ci while (off) { 9662306a36Sopenharmony_ci if (sge->length > off) 9762306a36Sopenharmony_ci break; 9862306a36Sopenharmony_ci off -= sge->length; 9962306a36Sopenharmony_ci sk_msg_iter_var_next(i); 10062306a36Sopenharmony_ci if (i == src->sg.end && off) 10162306a36Sopenharmony_ci return -ENOSPC; 10262306a36Sopenharmony_ci sge = sk_msg_elem(src, i); 10362306a36Sopenharmony_ci } 10462306a36Sopenharmony_ci 10562306a36Sopenharmony_ci while (len) { 10662306a36Sopenharmony_ci sge_len = sge->length - off; 10762306a36Sopenharmony_ci if (sge_len > len) 10862306a36Sopenharmony_ci sge_len = len; 10962306a36Sopenharmony_ci 11062306a36Sopenharmony_ci if (dst->sg.end) 11162306a36Sopenharmony_ci sgd = sk_msg_elem(dst, dst->sg.end - 1); 11262306a36Sopenharmony_ci 11362306a36Sopenharmony_ci if (sgd && 11462306a36Sopenharmony_ci (sg_page(sge) == sg_page(sgd)) && 11562306a36Sopenharmony_ci (sg_virt(sge) + off == sg_virt(sgd) + sgd->length)) { 11662306a36Sopenharmony_ci sgd->length += sge_len; 11762306a36Sopenharmony_ci dst->sg.size += sge_len; 11862306a36Sopenharmony_ci } else if (!sk_msg_full(dst)) { 11962306a36Sopenharmony_ci sge_off = sge->offset + off; 12062306a36Sopenharmony_ci sk_msg_page_add(dst, sg_page(sge), sge_len, sge_off); 12162306a36Sopenharmony_ci } else { 12262306a36Sopenharmony_ci return -ENOSPC; 12362306a36Sopenharmony_ci } 12462306a36Sopenharmony_ci 12562306a36Sopenharmony_ci off = 0; 12662306a36Sopenharmony_ci len -= sge_len; 12762306a36Sopenharmony_ci sk_mem_charge(sk, sge_len); 12862306a36Sopenharmony_ci sk_msg_iter_var_next(i); 12962306a36Sopenharmony_ci if (i == src->sg.end && len) 13062306a36Sopenharmony_ci return -ENOSPC; 13162306a36Sopenharmony_ci sge = sk_msg_elem(src, i); 13262306a36Sopenharmony_ci } 13362306a36Sopenharmony_ci 13462306a36Sopenharmony_ci return 0; 13562306a36Sopenharmony_ci} 13662306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(sk_msg_clone); 13762306a36Sopenharmony_ci 13862306a36Sopenharmony_civoid sk_msg_return_zero(struct sock *sk, struct sk_msg *msg, int bytes) 13962306a36Sopenharmony_ci{ 14062306a36Sopenharmony_ci int i = msg->sg.start; 14162306a36Sopenharmony_ci 14262306a36Sopenharmony_ci do { 14362306a36Sopenharmony_ci struct scatterlist *sge = sk_msg_elem(msg, i); 14462306a36Sopenharmony_ci 14562306a36Sopenharmony_ci if (bytes < sge->length) { 14662306a36Sopenharmony_ci sge->length -= bytes; 14762306a36Sopenharmony_ci sge->offset += bytes; 14862306a36Sopenharmony_ci sk_mem_uncharge(sk, bytes); 14962306a36Sopenharmony_ci break; 15062306a36Sopenharmony_ci } 15162306a36Sopenharmony_ci 15262306a36Sopenharmony_ci sk_mem_uncharge(sk, sge->length); 15362306a36Sopenharmony_ci bytes -= sge->length; 15462306a36Sopenharmony_ci sge->length = 0; 15562306a36Sopenharmony_ci sge->offset = 0; 15662306a36Sopenharmony_ci sk_msg_iter_var_next(i); 15762306a36Sopenharmony_ci } while (bytes && i != msg->sg.end); 15862306a36Sopenharmony_ci msg->sg.start = i; 15962306a36Sopenharmony_ci} 16062306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(sk_msg_return_zero); 16162306a36Sopenharmony_ci 16262306a36Sopenharmony_civoid sk_msg_return(struct sock *sk, struct sk_msg *msg, int bytes) 16362306a36Sopenharmony_ci{ 16462306a36Sopenharmony_ci int i = msg->sg.start; 16562306a36Sopenharmony_ci 16662306a36Sopenharmony_ci do { 16762306a36Sopenharmony_ci struct scatterlist *sge = &msg->sg.data[i]; 16862306a36Sopenharmony_ci int uncharge = (bytes < sge->length) ? bytes : sge->length; 16962306a36Sopenharmony_ci 17062306a36Sopenharmony_ci sk_mem_uncharge(sk, uncharge); 17162306a36Sopenharmony_ci bytes -= uncharge; 17262306a36Sopenharmony_ci sk_msg_iter_var_next(i); 17362306a36Sopenharmony_ci } while (i != msg->sg.end); 17462306a36Sopenharmony_ci} 17562306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(sk_msg_return); 17662306a36Sopenharmony_ci 17762306a36Sopenharmony_cistatic int sk_msg_free_elem(struct sock *sk, struct sk_msg *msg, u32 i, 17862306a36Sopenharmony_ci bool charge) 17962306a36Sopenharmony_ci{ 18062306a36Sopenharmony_ci struct scatterlist *sge = sk_msg_elem(msg, i); 18162306a36Sopenharmony_ci u32 len = sge->length; 18262306a36Sopenharmony_ci 18362306a36Sopenharmony_ci /* When the skb owns the memory we free it from consume_skb path. */ 18462306a36Sopenharmony_ci if (!msg->skb) { 18562306a36Sopenharmony_ci if (charge) 18662306a36Sopenharmony_ci sk_mem_uncharge(sk, len); 18762306a36Sopenharmony_ci put_page(sg_page(sge)); 18862306a36Sopenharmony_ci } 18962306a36Sopenharmony_ci memset(sge, 0, sizeof(*sge)); 19062306a36Sopenharmony_ci return len; 19162306a36Sopenharmony_ci} 19262306a36Sopenharmony_ci 19362306a36Sopenharmony_cistatic int __sk_msg_free(struct sock *sk, struct sk_msg *msg, u32 i, 19462306a36Sopenharmony_ci bool charge) 19562306a36Sopenharmony_ci{ 19662306a36Sopenharmony_ci struct scatterlist *sge = sk_msg_elem(msg, i); 19762306a36Sopenharmony_ci int freed = 0; 19862306a36Sopenharmony_ci 19962306a36Sopenharmony_ci while (msg->sg.size) { 20062306a36Sopenharmony_ci msg->sg.size -= sge->length; 20162306a36Sopenharmony_ci freed += sk_msg_free_elem(sk, msg, i, charge); 20262306a36Sopenharmony_ci sk_msg_iter_var_next(i); 20362306a36Sopenharmony_ci sk_msg_check_to_free(msg, i, msg->sg.size); 20462306a36Sopenharmony_ci sge = sk_msg_elem(msg, i); 20562306a36Sopenharmony_ci } 20662306a36Sopenharmony_ci consume_skb(msg->skb); 20762306a36Sopenharmony_ci sk_msg_init(msg); 20862306a36Sopenharmony_ci return freed; 20962306a36Sopenharmony_ci} 21062306a36Sopenharmony_ci 21162306a36Sopenharmony_ciint sk_msg_free_nocharge(struct sock *sk, struct sk_msg *msg) 21262306a36Sopenharmony_ci{ 21362306a36Sopenharmony_ci return __sk_msg_free(sk, msg, msg->sg.start, false); 21462306a36Sopenharmony_ci} 21562306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(sk_msg_free_nocharge); 21662306a36Sopenharmony_ci 21762306a36Sopenharmony_ciint sk_msg_free(struct sock *sk, struct sk_msg *msg) 21862306a36Sopenharmony_ci{ 21962306a36Sopenharmony_ci return __sk_msg_free(sk, msg, msg->sg.start, true); 22062306a36Sopenharmony_ci} 22162306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(sk_msg_free); 22262306a36Sopenharmony_ci 22362306a36Sopenharmony_cistatic void __sk_msg_free_partial(struct sock *sk, struct sk_msg *msg, 22462306a36Sopenharmony_ci u32 bytes, bool charge) 22562306a36Sopenharmony_ci{ 22662306a36Sopenharmony_ci struct scatterlist *sge; 22762306a36Sopenharmony_ci u32 i = msg->sg.start; 22862306a36Sopenharmony_ci 22962306a36Sopenharmony_ci while (bytes) { 23062306a36Sopenharmony_ci sge = sk_msg_elem(msg, i); 23162306a36Sopenharmony_ci if (!sge->length) 23262306a36Sopenharmony_ci break; 23362306a36Sopenharmony_ci if (bytes < sge->length) { 23462306a36Sopenharmony_ci if (charge) 23562306a36Sopenharmony_ci sk_mem_uncharge(sk, bytes); 23662306a36Sopenharmony_ci sge->length -= bytes; 23762306a36Sopenharmony_ci sge->offset += bytes; 23862306a36Sopenharmony_ci msg->sg.size -= bytes; 23962306a36Sopenharmony_ci break; 24062306a36Sopenharmony_ci } 24162306a36Sopenharmony_ci 24262306a36Sopenharmony_ci msg->sg.size -= sge->length; 24362306a36Sopenharmony_ci bytes -= sge->length; 24462306a36Sopenharmony_ci sk_msg_free_elem(sk, msg, i, charge); 24562306a36Sopenharmony_ci sk_msg_iter_var_next(i); 24662306a36Sopenharmony_ci sk_msg_check_to_free(msg, i, bytes); 24762306a36Sopenharmony_ci } 24862306a36Sopenharmony_ci msg->sg.start = i; 24962306a36Sopenharmony_ci} 25062306a36Sopenharmony_ci 25162306a36Sopenharmony_civoid sk_msg_free_partial(struct sock *sk, struct sk_msg *msg, u32 bytes) 25262306a36Sopenharmony_ci{ 25362306a36Sopenharmony_ci __sk_msg_free_partial(sk, msg, bytes, true); 25462306a36Sopenharmony_ci} 25562306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(sk_msg_free_partial); 25662306a36Sopenharmony_ci 25762306a36Sopenharmony_civoid sk_msg_free_partial_nocharge(struct sock *sk, struct sk_msg *msg, 25862306a36Sopenharmony_ci u32 bytes) 25962306a36Sopenharmony_ci{ 26062306a36Sopenharmony_ci __sk_msg_free_partial(sk, msg, bytes, false); 26162306a36Sopenharmony_ci} 26262306a36Sopenharmony_ci 26362306a36Sopenharmony_civoid sk_msg_trim(struct sock *sk, struct sk_msg *msg, int len) 26462306a36Sopenharmony_ci{ 26562306a36Sopenharmony_ci int trim = msg->sg.size - len; 26662306a36Sopenharmony_ci u32 i = msg->sg.end; 26762306a36Sopenharmony_ci 26862306a36Sopenharmony_ci if (trim <= 0) { 26962306a36Sopenharmony_ci WARN_ON(trim < 0); 27062306a36Sopenharmony_ci return; 27162306a36Sopenharmony_ci } 27262306a36Sopenharmony_ci 27362306a36Sopenharmony_ci sk_msg_iter_var_prev(i); 27462306a36Sopenharmony_ci msg->sg.size = len; 27562306a36Sopenharmony_ci while (msg->sg.data[i].length && 27662306a36Sopenharmony_ci trim >= msg->sg.data[i].length) { 27762306a36Sopenharmony_ci trim -= msg->sg.data[i].length; 27862306a36Sopenharmony_ci sk_msg_free_elem(sk, msg, i, true); 27962306a36Sopenharmony_ci sk_msg_iter_var_prev(i); 28062306a36Sopenharmony_ci if (!trim) 28162306a36Sopenharmony_ci goto out; 28262306a36Sopenharmony_ci } 28362306a36Sopenharmony_ci 28462306a36Sopenharmony_ci msg->sg.data[i].length -= trim; 28562306a36Sopenharmony_ci sk_mem_uncharge(sk, trim); 28662306a36Sopenharmony_ci /* Adjust copybreak if it falls into the trimmed part of last buf */ 28762306a36Sopenharmony_ci if (msg->sg.curr == i && msg->sg.copybreak > msg->sg.data[i].length) 28862306a36Sopenharmony_ci msg->sg.copybreak = msg->sg.data[i].length; 28962306a36Sopenharmony_ciout: 29062306a36Sopenharmony_ci sk_msg_iter_var_next(i); 29162306a36Sopenharmony_ci msg->sg.end = i; 29262306a36Sopenharmony_ci 29362306a36Sopenharmony_ci /* If we trim data a full sg elem before curr pointer update 29462306a36Sopenharmony_ci * copybreak and current so that any future copy operations 29562306a36Sopenharmony_ci * start at new copy location. 29662306a36Sopenharmony_ci * However trimed data that has not yet been used in a copy op 29762306a36Sopenharmony_ci * does not require an update. 29862306a36Sopenharmony_ci */ 29962306a36Sopenharmony_ci if (!msg->sg.size) { 30062306a36Sopenharmony_ci msg->sg.curr = msg->sg.start; 30162306a36Sopenharmony_ci msg->sg.copybreak = 0; 30262306a36Sopenharmony_ci } else if (sk_msg_iter_dist(msg->sg.start, msg->sg.curr) >= 30362306a36Sopenharmony_ci sk_msg_iter_dist(msg->sg.start, msg->sg.end)) { 30462306a36Sopenharmony_ci sk_msg_iter_var_prev(i); 30562306a36Sopenharmony_ci msg->sg.curr = i; 30662306a36Sopenharmony_ci msg->sg.copybreak = msg->sg.data[i].length; 30762306a36Sopenharmony_ci } 30862306a36Sopenharmony_ci} 30962306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(sk_msg_trim); 31062306a36Sopenharmony_ci 31162306a36Sopenharmony_ciint sk_msg_zerocopy_from_iter(struct sock *sk, struct iov_iter *from, 31262306a36Sopenharmony_ci struct sk_msg *msg, u32 bytes) 31362306a36Sopenharmony_ci{ 31462306a36Sopenharmony_ci int i, maxpages, ret = 0, num_elems = sk_msg_elem_used(msg); 31562306a36Sopenharmony_ci const int to_max_pages = MAX_MSG_FRAGS; 31662306a36Sopenharmony_ci struct page *pages[MAX_MSG_FRAGS]; 31762306a36Sopenharmony_ci ssize_t orig, copied, use, offset; 31862306a36Sopenharmony_ci 31962306a36Sopenharmony_ci orig = msg->sg.size; 32062306a36Sopenharmony_ci while (bytes > 0) { 32162306a36Sopenharmony_ci i = 0; 32262306a36Sopenharmony_ci maxpages = to_max_pages - num_elems; 32362306a36Sopenharmony_ci if (maxpages == 0) { 32462306a36Sopenharmony_ci ret = -EFAULT; 32562306a36Sopenharmony_ci goto out; 32662306a36Sopenharmony_ci } 32762306a36Sopenharmony_ci 32862306a36Sopenharmony_ci copied = iov_iter_get_pages2(from, pages, bytes, maxpages, 32962306a36Sopenharmony_ci &offset); 33062306a36Sopenharmony_ci if (copied <= 0) { 33162306a36Sopenharmony_ci ret = -EFAULT; 33262306a36Sopenharmony_ci goto out; 33362306a36Sopenharmony_ci } 33462306a36Sopenharmony_ci 33562306a36Sopenharmony_ci bytes -= copied; 33662306a36Sopenharmony_ci msg->sg.size += copied; 33762306a36Sopenharmony_ci 33862306a36Sopenharmony_ci while (copied) { 33962306a36Sopenharmony_ci use = min_t(int, copied, PAGE_SIZE - offset); 34062306a36Sopenharmony_ci sg_set_page(&msg->sg.data[msg->sg.end], 34162306a36Sopenharmony_ci pages[i], use, offset); 34262306a36Sopenharmony_ci sg_unmark_end(&msg->sg.data[msg->sg.end]); 34362306a36Sopenharmony_ci sk_mem_charge(sk, use); 34462306a36Sopenharmony_ci 34562306a36Sopenharmony_ci offset = 0; 34662306a36Sopenharmony_ci copied -= use; 34762306a36Sopenharmony_ci sk_msg_iter_next(msg, end); 34862306a36Sopenharmony_ci num_elems++; 34962306a36Sopenharmony_ci i++; 35062306a36Sopenharmony_ci } 35162306a36Sopenharmony_ci /* When zerocopy is mixed with sk_msg_*copy* operations we 35262306a36Sopenharmony_ci * may have a copybreak set in this case clear and prefer 35362306a36Sopenharmony_ci * zerocopy remainder when possible. 35462306a36Sopenharmony_ci */ 35562306a36Sopenharmony_ci msg->sg.copybreak = 0; 35662306a36Sopenharmony_ci msg->sg.curr = msg->sg.end; 35762306a36Sopenharmony_ci } 35862306a36Sopenharmony_ciout: 35962306a36Sopenharmony_ci /* Revert iov_iter updates, msg will need to use 'trim' later if it 36062306a36Sopenharmony_ci * also needs to be cleared. 36162306a36Sopenharmony_ci */ 36262306a36Sopenharmony_ci if (ret) 36362306a36Sopenharmony_ci iov_iter_revert(from, msg->sg.size - orig); 36462306a36Sopenharmony_ci return ret; 36562306a36Sopenharmony_ci} 36662306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(sk_msg_zerocopy_from_iter); 36762306a36Sopenharmony_ci 36862306a36Sopenharmony_ciint sk_msg_memcopy_from_iter(struct sock *sk, struct iov_iter *from, 36962306a36Sopenharmony_ci struct sk_msg *msg, u32 bytes) 37062306a36Sopenharmony_ci{ 37162306a36Sopenharmony_ci int ret = -ENOSPC, i = msg->sg.curr; 37262306a36Sopenharmony_ci struct scatterlist *sge; 37362306a36Sopenharmony_ci u32 copy, buf_size; 37462306a36Sopenharmony_ci void *to; 37562306a36Sopenharmony_ci 37662306a36Sopenharmony_ci do { 37762306a36Sopenharmony_ci sge = sk_msg_elem(msg, i); 37862306a36Sopenharmony_ci /* This is possible if a trim operation shrunk the buffer */ 37962306a36Sopenharmony_ci if (msg->sg.copybreak >= sge->length) { 38062306a36Sopenharmony_ci msg->sg.copybreak = 0; 38162306a36Sopenharmony_ci sk_msg_iter_var_next(i); 38262306a36Sopenharmony_ci if (i == msg->sg.end) 38362306a36Sopenharmony_ci break; 38462306a36Sopenharmony_ci sge = sk_msg_elem(msg, i); 38562306a36Sopenharmony_ci } 38662306a36Sopenharmony_ci 38762306a36Sopenharmony_ci buf_size = sge->length - msg->sg.copybreak; 38862306a36Sopenharmony_ci copy = (buf_size > bytes) ? bytes : buf_size; 38962306a36Sopenharmony_ci to = sg_virt(sge) + msg->sg.copybreak; 39062306a36Sopenharmony_ci msg->sg.copybreak += copy; 39162306a36Sopenharmony_ci if (sk->sk_route_caps & NETIF_F_NOCACHE_COPY) 39262306a36Sopenharmony_ci ret = copy_from_iter_nocache(to, copy, from); 39362306a36Sopenharmony_ci else 39462306a36Sopenharmony_ci ret = copy_from_iter(to, copy, from); 39562306a36Sopenharmony_ci if (ret != copy) { 39662306a36Sopenharmony_ci ret = -EFAULT; 39762306a36Sopenharmony_ci goto out; 39862306a36Sopenharmony_ci } 39962306a36Sopenharmony_ci bytes -= copy; 40062306a36Sopenharmony_ci if (!bytes) 40162306a36Sopenharmony_ci break; 40262306a36Sopenharmony_ci msg->sg.copybreak = 0; 40362306a36Sopenharmony_ci sk_msg_iter_var_next(i); 40462306a36Sopenharmony_ci } while (i != msg->sg.end); 40562306a36Sopenharmony_ciout: 40662306a36Sopenharmony_ci msg->sg.curr = i; 40762306a36Sopenharmony_ci return ret; 40862306a36Sopenharmony_ci} 40962306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(sk_msg_memcopy_from_iter); 41062306a36Sopenharmony_ci 41162306a36Sopenharmony_ci/* Receive sk_msg from psock->ingress_msg to @msg. */ 41262306a36Sopenharmony_ciint sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg, 41362306a36Sopenharmony_ci int len, int flags) 41462306a36Sopenharmony_ci{ 41562306a36Sopenharmony_ci struct iov_iter *iter = &msg->msg_iter; 41662306a36Sopenharmony_ci int peek = flags & MSG_PEEK; 41762306a36Sopenharmony_ci struct sk_msg *msg_rx; 41862306a36Sopenharmony_ci int i, copied = 0; 41962306a36Sopenharmony_ci 42062306a36Sopenharmony_ci msg_rx = sk_psock_peek_msg(psock); 42162306a36Sopenharmony_ci while (copied != len) { 42262306a36Sopenharmony_ci struct scatterlist *sge; 42362306a36Sopenharmony_ci 42462306a36Sopenharmony_ci if (unlikely(!msg_rx)) 42562306a36Sopenharmony_ci break; 42662306a36Sopenharmony_ci 42762306a36Sopenharmony_ci i = msg_rx->sg.start; 42862306a36Sopenharmony_ci do { 42962306a36Sopenharmony_ci struct page *page; 43062306a36Sopenharmony_ci int copy; 43162306a36Sopenharmony_ci 43262306a36Sopenharmony_ci sge = sk_msg_elem(msg_rx, i); 43362306a36Sopenharmony_ci copy = sge->length; 43462306a36Sopenharmony_ci page = sg_page(sge); 43562306a36Sopenharmony_ci if (copied + copy > len) 43662306a36Sopenharmony_ci copy = len - copied; 43762306a36Sopenharmony_ci copy = copy_page_to_iter(page, sge->offset, copy, iter); 43862306a36Sopenharmony_ci if (!copy) { 43962306a36Sopenharmony_ci copied = copied ? copied : -EFAULT; 44062306a36Sopenharmony_ci goto out; 44162306a36Sopenharmony_ci } 44262306a36Sopenharmony_ci 44362306a36Sopenharmony_ci copied += copy; 44462306a36Sopenharmony_ci if (likely(!peek)) { 44562306a36Sopenharmony_ci sge->offset += copy; 44662306a36Sopenharmony_ci sge->length -= copy; 44762306a36Sopenharmony_ci if (!msg_rx->skb) 44862306a36Sopenharmony_ci sk_mem_uncharge(sk, copy); 44962306a36Sopenharmony_ci msg_rx->sg.size -= copy; 45062306a36Sopenharmony_ci 45162306a36Sopenharmony_ci if (!sge->length) { 45262306a36Sopenharmony_ci sk_msg_iter_var_next(i); 45362306a36Sopenharmony_ci if (!msg_rx->skb) 45462306a36Sopenharmony_ci put_page(page); 45562306a36Sopenharmony_ci } 45662306a36Sopenharmony_ci } else { 45762306a36Sopenharmony_ci /* Lets not optimize peek case if copy_page_to_iter 45862306a36Sopenharmony_ci * didn't copy the entire length lets just break. 45962306a36Sopenharmony_ci */ 46062306a36Sopenharmony_ci if (copy != sge->length) 46162306a36Sopenharmony_ci goto out; 46262306a36Sopenharmony_ci sk_msg_iter_var_next(i); 46362306a36Sopenharmony_ci } 46462306a36Sopenharmony_ci 46562306a36Sopenharmony_ci if (copied == len) 46662306a36Sopenharmony_ci break; 46762306a36Sopenharmony_ci } while ((i != msg_rx->sg.end) && !sg_is_last(sge)); 46862306a36Sopenharmony_ci 46962306a36Sopenharmony_ci if (unlikely(peek)) { 47062306a36Sopenharmony_ci msg_rx = sk_psock_next_msg(psock, msg_rx); 47162306a36Sopenharmony_ci if (!msg_rx) 47262306a36Sopenharmony_ci break; 47362306a36Sopenharmony_ci continue; 47462306a36Sopenharmony_ci } 47562306a36Sopenharmony_ci 47662306a36Sopenharmony_ci msg_rx->sg.start = i; 47762306a36Sopenharmony_ci if (!sge->length && (i == msg_rx->sg.end || sg_is_last(sge))) { 47862306a36Sopenharmony_ci msg_rx = sk_psock_dequeue_msg(psock); 47962306a36Sopenharmony_ci kfree_sk_msg(msg_rx); 48062306a36Sopenharmony_ci } 48162306a36Sopenharmony_ci msg_rx = sk_psock_peek_msg(psock); 48262306a36Sopenharmony_ci } 48362306a36Sopenharmony_ciout: 48462306a36Sopenharmony_ci return copied; 48562306a36Sopenharmony_ci} 48662306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(sk_msg_recvmsg); 48762306a36Sopenharmony_ci 48862306a36Sopenharmony_cibool sk_msg_is_readable(struct sock *sk) 48962306a36Sopenharmony_ci{ 49062306a36Sopenharmony_ci struct sk_psock *psock; 49162306a36Sopenharmony_ci bool empty = true; 49262306a36Sopenharmony_ci 49362306a36Sopenharmony_ci rcu_read_lock(); 49462306a36Sopenharmony_ci psock = sk_psock(sk); 49562306a36Sopenharmony_ci if (likely(psock)) 49662306a36Sopenharmony_ci empty = list_empty(&psock->ingress_msg); 49762306a36Sopenharmony_ci rcu_read_unlock(); 49862306a36Sopenharmony_ci return !empty; 49962306a36Sopenharmony_ci} 50062306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(sk_msg_is_readable); 50162306a36Sopenharmony_ci 50262306a36Sopenharmony_cistatic struct sk_msg *alloc_sk_msg(gfp_t gfp) 50362306a36Sopenharmony_ci{ 50462306a36Sopenharmony_ci struct sk_msg *msg; 50562306a36Sopenharmony_ci 50662306a36Sopenharmony_ci msg = kzalloc(sizeof(*msg), gfp | __GFP_NOWARN); 50762306a36Sopenharmony_ci if (unlikely(!msg)) 50862306a36Sopenharmony_ci return NULL; 50962306a36Sopenharmony_ci sg_init_marker(msg->sg.data, NR_MSG_FRAG_IDS); 51062306a36Sopenharmony_ci return msg; 51162306a36Sopenharmony_ci} 51262306a36Sopenharmony_ci 51362306a36Sopenharmony_cistatic struct sk_msg *sk_psock_create_ingress_msg(struct sock *sk, 51462306a36Sopenharmony_ci struct sk_buff *skb) 51562306a36Sopenharmony_ci{ 51662306a36Sopenharmony_ci if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) 51762306a36Sopenharmony_ci return NULL; 51862306a36Sopenharmony_ci 51962306a36Sopenharmony_ci if (!sk_rmem_schedule(sk, skb, skb->truesize)) 52062306a36Sopenharmony_ci return NULL; 52162306a36Sopenharmony_ci 52262306a36Sopenharmony_ci return alloc_sk_msg(GFP_KERNEL); 52362306a36Sopenharmony_ci} 52462306a36Sopenharmony_ci 52562306a36Sopenharmony_cistatic int sk_psock_skb_ingress_enqueue(struct sk_buff *skb, 52662306a36Sopenharmony_ci u32 off, u32 len, 52762306a36Sopenharmony_ci struct sk_psock *psock, 52862306a36Sopenharmony_ci struct sock *sk, 52962306a36Sopenharmony_ci struct sk_msg *msg) 53062306a36Sopenharmony_ci{ 53162306a36Sopenharmony_ci int num_sge, copied; 53262306a36Sopenharmony_ci 53362306a36Sopenharmony_ci num_sge = skb_to_sgvec(skb, msg->sg.data, off, len); 53462306a36Sopenharmony_ci if (num_sge < 0) { 53562306a36Sopenharmony_ci /* skb linearize may fail with ENOMEM, but lets simply try again 53662306a36Sopenharmony_ci * later if this happens. Under memory pressure we don't want to 53762306a36Sopenharmony_ci * drop the skb. We need to linearize the skb so that the mapping 53862306a36Sopenharmony_ci * in skb_to_sgvec can not error. 53962306a36Sopenharmony_ci */ 54062306a36Sopenharmony_ci if (skb_linearize(skb)) 54162306a36Sopenharmony_ci return -EAGAIN; 54262306a36Sopenharmony_ci 54362306a36Sopenharmony_ci num_sge = skb_to_sgvec(skb, msg->sg.data, off, len); 54462306a36Sopenharmony_ci if (unlikely(num_sge < 0)) 54562306a36Sopenharmony_ci return num_sge; 54662306a36Sopenharmony_ci } 54762306a36Sopenharmony_ci 54862306a36Sopenharmony_ci copied = len; 54962306a36Sopenharmony_ci msg->sg.start = 0; 55062306a36Sopenharmony_ci msg->sg.size = copied; 55162306a36Sopenharmony_ci msg->sg.end = num_sge; 55262306a36Sopenharmony_ci msg->skb = skb; 55362306a36Sopenharmony_ci 55462306a36Sopenharmony_ci sk_psock_queue_msg(psock, msg); 55562306a36Sopenharmony_ci sk_psock_data_ready(sk, psock); 55662306a36Sopenharmony_ci return copied; 55762306a36Sopenharmony_ci} 55862306a36Sopenharmony_ci 55962306a36Sopenharmony_cistatic int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb, 56062306a36Sopenharmony_ci u32 off, u32 len); 56162306a36Sopenharmony_ci 56262306a36Sopenharmony_cistatic int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb, 56362306a36Sopenharmony_ci u32 off, u32 len) 56462306a36Sopenharmony_ci{ 56562306a36Sopenharmony_ci struct sock *sk = psock->sk; 56662306a36Sopenharmony_ci struct sk_msg *msg; 56762306a36Sopenharmony_ci int err; 56862306a36Sopenharmony_ci 56962306a36Sopenharmony_ci /* If we are receiving on the same sock skb->sk is already assigned, 57062306a36Sopenharmony_ci * skip memory accounting and owner transition seeing it already set 57162306a36Sopenharmony_ci * correctly. 57262306a36Sopenharmony_ci */ 57362306a36Sopenharmony_ci if (unlikely(skb->sk == sk)) 57462306a36Sopenharmony_ci return sk_psock_skb_ingress_self(psock, skb, off, len); 57562306a36Sopenharmony_ci msg = sk_psock_create_ingress_msg(sk, skb); 57662306a36Sopenharmony_ci if (!msg) 57762306a36Sopenharmony_ci return -EAGAIN; 57862306a36Sopenharmony_ci 57962306a36Sopenharmony_ci /* This will transition ownership of the data from the socket where 58062306a36Sopenharmony_ci * the BPF program was run initiating the redirect to the socket 58162306a36Sopenharmony_ci * we will eventually receive this data on. The data will be released 58262306a36Sopenharmony_ci * from skb_consume found in __tcp_bpf_recvmsg() after its been copied 58362306a36Sopenharmony_ci * into user buffers. 58462306a36Sopenharmony_ci */ 58562306a36Sopenharmony_ci skb_set_owner_r(skb, sk); 58662306a36Sopenharmony_ci err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg); 58762306a36Sopenharmony_ci if (err < 0) 58862306a36Sopenharmony_ci kfree(msg); 58962306a36Sopenharmony_ci return err; 59062306a36Sopenharmony_ci} 59162306a36Sopenharmony_ci 59262306a36Sopenharmony_ci/* Puts an skb on the ingress queue of the socket already assigned to the 59362306a36Sopenharmony_ci * skb. In this case we do not need to check memory limits or skb_set_owner_r 59462306a36Sopenharmony_ci * because the skb is already accounted for here. 59562306a36Sopenharmony_ci */ 59662306a36Sopenharmony_cistatic int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb, 59762306a36Sopenharmony_ci u32 off, u32 len) 59862306a36Sopenharmony_ci{ 59962306a36Sopenharmony_ci struct sk_msg *msg = alloc_sk_msg(GFP_ATOMIC); 60062306a36Sopenharmony_ci struct sock *sk = psock->sk; 60162306a36Sopenharmony_ci int err; 60262306a36Sopenharmony_ci 60362306a36Sopenharmony_ci if (unlikely(!msg)) 60462306a36Sopenharmony_ci return -EAGAIN; 60562306a36Sopenharmony_ci skb_set_owner_r(skb, sk); 60662306a36Sopenharmony_ci err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg); 60762306a36Sopenharmony_ci if (err < 0) 60862306a36Sopenharmony_ci kfree(msg); 60962306a36Sopenharmony_ci return err; 61062306a36Sopenharmony_ci} 61162306a36Sopenharmony_ci 61262306a36Sopenharmony_cistatic int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb, 61362306a36Sopenharmony_ci u32 off, u32 len, bool ingress) 61462306a36Sopenharmony_ci{ 61562306a36Sopenharmony_ci int err = 0; 61662306a36Sopenharmony_ci 61762306a36Sopenharmony_ci if (!ingress) { 61862306a36Sopenharmony_ci if (!sock_writeable(psock->sk)) 61962306a36Sopenharmony_ci return -EAGAIN; 62062306a36Sopenharmony_ci return skb_send_sock(psock->sk, skb, off, len); 62162306a36Sopenharmony_ci } 62262306a36Sopenharmony_ci skb_get(skb); 62362306a36Sopenharmony_ci err = sk_psock_skb_ingress(psock, skb, off, len); 62462306a36Sopenharmony_ci if (err < 0) 62562306a36Sopenharmony_ci kfree_skb(skb); 62662306a36Sopenharmony_ci return err; 62762306a36Sopenharmony_ci} 62862306a36Sopenharmony_ci 62962306a36Sopenharmony_cistatic void sk_psock_skb_state(struct sk_psock *psock, 63062306a36Sopenharmony_ci struct sk_psock_work_state *state, 63162306a36Sopenharmony_ci int len, int off) 63262306a36Sopenharmony_ci{ 63362306a36Sopenharmony_ci spin_lock_bh(&psock->ingress_lock); 63462306a36Sopenharmony_ci if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) { 63562306a36Sopenharmony_ci state->len = len; 63662306a36Sopenharmony_ci state->off = off; 63762306a36Sopenharmony_ci } 63862306a36Sopenharmony_ci spin_unlock_bh(&psock->ingress_lock); 63962306a36Sopenharmony_ci} 64062306a36Sopenharmony_ci 64162306a36Sopenharmony_cistatic void sk_psock_backlog(struct work_struct *work) 64262306a36Sopenharmony_ci{ 64362306a36Sopenharmony_ci struct delayed_work *dwork = to_delayed_work(work); 64462306a36Sopenharmony_ci struct sk_psock *psock = container_of(dwork, struct sk_psock, work); 64562306a36Sopenharmony_ci struct sk_psock_work_state *state = &psock->work_state; 64662306a36Sopenharmony_ci struct sk_buff *skb = NULL; 64762306a36Sopenharmony_ci u32 len = 0, off = 0; 64862306a36Sopenharmony_ci bool ingress; 64962306a36Sopenharmony_ci int ret; 65062306a36Sopenharmony_ci 65162306a36Sopenharmony_ci mutex_lock(&psock->work_mutex); 65262306a36Sopenharmony_ci if (unlikely(state->len)) { 65362306a36Sopenharmony_ci len = state->len; 65462306a36Sopenharmony_ci off = state->off; 65562306a36Sopenharmony_ci } 65662306a36Sopenharmony_ci 65762306a36Sopenharmony_ci while ((skb = skb_peek(&psock->ingress_skb))) { 65862306a36Sopenharmony_ci len = skb->len; 65962306a36Sopenharmony_ci off = 0; 66062306a36Sopenharmony_ci if (skb_bpf_strparser(skb)) { 66162306a36Sopenharmony_ci struct strp_msg *stm = strp_msg(skb); 66262306a36Sopenharmony_ci 66362306a36Sopenharmony_ci off = stm->offset; 66462306a36Sopenharmony_ci len = stm->full_len; 66562306a36Sopenharmony_ci } 66662306a36Sopenharmony_ci ingress = skb_bpf_ingress(skb); 66762306a36Sopenharmony_ci skb_bpf_redirect_clear(skb); 66862306a36Sopenharmony_ci do { 66962306a36Sopenharmony_ci ret = -EIO; 67062306a36Sopenharmony_ci if (!sock_flag(psock->sk, SOCK_DEAD)) 67162306a36Sopenharmony_ci ret = sk_psock_handle_skb(psock, skb, off, 67262306a36Sopenharmony_ci len, ingress); 67362306a36Sopenharmony_ci if (ret <= 0) { 67462306a36Sopenharmony_ci if (ret == -EAGAIN) { 67562306a36Sopenharmony_ci sk_psock_skb_state(psock, state, len, off); 67662306a36Sopenharmony_ci 67762306a36Sopenharmony_ci /* Delay slightly to prioritize any 67862306a36Sopenharmony_ci * other work that might be here. 67962306a36Sopenharmony_ci */ 68062306a36Sopenharmony_ci if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) 68162306a36Sopenharmony_ci schedule_delayed_work(&psock->work, 1); 68262306a36Sopenharmony_ci goto end; 68362306a36Sopenharmony_ci } 68462306a36Sopenharmony_ci /* Hard errors break pipe and stop xmit. */ 68562306a36Sopenharmony_ci sk_psock_report_error(psock, ret ? -ret : EPIPE); 68662306a36Sopenharmony_ci sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED); 68762306a36Sopenharmony_ci goto end; 68862306a36Sopenharmony_ci } 68962306a36Sopenharmony_ci off += ret; 69062306a36Sopenharmony_ci len -= ret; 69162306a36Sopenharmony_ci } while (len); 69262306a36Sopenharmony_ci 69362306a36Sopenharmony_ci skb = skb_dequeue(&psock->ingress_skb); 69462306a36Sopenharmony_ci kfree_skb(skb); 69562306a36Sopenharmony_ci } 69662306a36Sopenharmony_ciend: 69762306a36Sopenharmony_ci mutex_unlock(&psock->work_mutex); 69862306a36Sopenharmony_ci} 69962306a36Sopenharmony_ci 70062306a36Sopenharmony_cistruct sk_psock *sk_psock_init(struct sock *sk, int node) 70162306a36Sopenharmony_ci{ 70262306a36Sopenharmony_ci struct sk_psock *psock; 70362306a36Sopenharmony_ci struct proto *prot; 70462306a36Sopenharmony_ci 70562306a36Sopenharmony_ci write_lock_bh(&sk->sk_callback_lock); 70662306a36Sopenharmony_ci 70762306a36Sopenharmony_ci if (sk_is_inet(sk) && inet_csk_has_ulp(sk)) { 70862306a36Sopenharmony_ci psock = ERR_PTR(-EINVAL); 70962306a36Sopenharmony_ci goto out; 71062306a36Sopenharmony_ci } 71162306a36Sopenharmony_ci 71262306a36Sopenharmony_ci if (sk->sk_user_data) { 71362306a36Sopenharmony_ci psock = ERR_PTR(-EBUSY); 71462306a36Sopenharmony_ci goto out; 71562306a36Sopenharmony_ci } 71662306a36Sopenharmony_ci 71762306a36Sopenharmony_ci psock = kzalloc_node(sizeof(*psock), GFP_ATOMIC | __GFP_NOWARN, node); 71862306a36Sopenharmony_ci if (!psock) { 71962306a36Sopenharmony_ci psock = ERR_PTR(-ENOMEM); 72062306a36Sopenharmony_ci goto out; 72162306a36Sopenharmony_ci } 72262306a36Sopenharmony_ci 72362306a36Sopenharmony_ci prot = READ_ONCE(sk->sk_prot); 72462306a36Sopenharmony_ci psock->sk = sk; 72562306a36Sopenharmony_ci psock->eval = __SK_NONE; 72662306a36Sopenharmony_ci psock->sk_proto = prot; 72762306a36Sopenharmony_ci psock->saved_unhash = prot->unhash; 72862306a36Sopenharmony_ci psock->saved_destroy = prot->destroy; 72962306a36Sopenharmony_ci psock->saved_close = prot->close; 73062306a36Sopenharmony_ci psock->saved_write_space = sk->sk_write_space; 73162306a36Sopenharmony_ci 73262306a36Sopenharmony_ci INIT_LIST_HEAD(&psock->link); 73362306a36Sopenharmony_ci spin_lock_init(&psock->link_lock); 73462306a36Sopenharmony_ci 73562306a36Sopenharmony_ci INIT_DELAYED_WORK(&psock->work, sk_psock_backlog); 73662306a36Sopenharmony_ci mutex_init(&psock->work_mutex); 73762306a36Sopenharmony_ci INIT_LIST_HEAD(&psock->ingress_msg); 73862306a36Sopenharmony_ci spin_lock_init(&psock->ingress_lock); 73962306a36Sopenharmony_ci skb_queue_head_init(&psock->ingress_skb); 74062306a36Sopenharmony_ci 74162306a36Sopenharmony_ci sk_psock_set_state(psock, SK_PSOCK_TX_ENABLED); 74262306a36Sopenharmony_ci refcount_set(&psock->refcnt, 1); 74362306a36Sopenharmony_ci 74462306a36Sopenharmony_ci __rcu_assign_sk_user_data_with_flags(sk, psock, 74562306a36Sopenharmony_ci SK_USER_DATA_NOCOPY | 74662306a36Sopenharmony_ci SK_USER_DATA_PSOCK); 74762306a36Sopenharmony_ci sock_hold(sk); 74862306a36Sopenharmony_ci 74962306a36Sopenharmony_ciout: 75062306a36Sopenharmony_ci write_unlock_bh(&sk->sk_callback_lock); 75162306a36Sopenharmony_ci return psock; 75262306a36Sopenharmony_ci} 75362306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(sk_psock_init); 75462306a36Sopenharmony_ci 75562306a36Sopenharmony_cistruct sk_psock_link *sk_psock_link_pop(struct sk_psock *psock) 75662306a36Sopenharmony_ci{ 75762306a36Sopenharmony_ci struct sk_psock_link *link; 75862306a36Sopenharmony_ci 75962306a36Sopenharmony_ci spin_lock_bh(&psock->link_lock); 76062306a36Sopenharmony_ci link = list_first_entry_or_null(&psock->link, struct sk_psock_link, 76162306a36Sopenharmony_ci list); 76262306a36Sopenharmony_ci if (link) 76362306a36Sopenharmony_ci list_del(&link->list); 76462306a36Sopenharmony_ci spin_unlock_bh(&psock->link_lock); 76562306a36Sopenharmony_ci return link; 76662306a36Sopenharmony_ci} 76762306a36Sopenharmony_ci 76862306a36Sopenharmony_cistatic void __sk_psock_purge_ingress_msg(struct sk_psock *psock) 76962306a36Sopenharmony_ci{ 77062306a36Sopenharmony_ci struct sk_msg *msg, *tmp; 77162306a36Sopenharmony_ci 77262306a36Sopenharmony_ci list_for_each_entry_safe(msg, tmp, &psock->ingress_msg, list) { 77362306a36Sopenharmony_ci list_del(&msg->list); 77462306a36Sopenharmony_ci sk_msg_free(psock->sk, msg); 77562306a36Sopenharmony_ci kfree(msg); 77662306a36Sopenharmony_ci } 77762306a36Sopenharmony_ci} 77862306a36Sopenharmony_ci 77962306a36Sopenharmony_cistatic void __sk_psock_zap_ingress(struct sk_psock *psock) 78062306a36Sopenharmony_ci{ 78162306a36Sopenharmony_ci struct sk_buff *skb; 78262306a36Sopenharmony_ci 78362306a36Sopenharmony_ci while ((skb = skb_dequeue(&psock->ingress_skb)) != NULL) { 78462306a36Sopenharmony_ci skb_bpf_redirect_clear(skb); 78562306a36Sopenharmony_ci sock_drop(psock->sk, skb); 78662306a36Sopenharmony_ci } 78762306a36Sopenharmony_ci __sk_psock_purge_ingress_msg(psock); 78862306a36Sopenharmony_ci} 78962306a36Sopenharmony_ci 79062306a36Sopenharmony_cistatic void sk_psock_link_destroy(struct sk_psock *psock) 79162306a36Sopenharmony_ci{ 79262306a36Sopenharmony_ci struct sk_psock_link *link, *tmp; 79362306a36Sopenharmony_ci 79462306a36Sopenharmony_ci list_for_each_entry_safe(link, tmp, &psock->link, list) { 79562306a36Sopenharmony_ci list_del(&link->list); 79662306a36Sopenharmony_ci sk_psock_free_link(link); 79762306a36Sopenharmony_ci } 79862306a36Sopenharmony_ci} 79962306a36Sopenharmony_ci 80062306a36Sopenharmony_civoid sk_psock_stop(struct sk_psock *psock) 80162306a36Sopenharmony_ci{ 80262306a36Sopenharmony_ci spin_lock_bh(&psock->ingress_lock); 80362306a36Sopenharmony_ci sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED); 80462306a36Sopenharmony_ci sk_psock_cork_free(psock); 80562306a36Sopenharmony_ci spin_unlock_bh(&psock->ingress_lock); 80662306a36Sopenharmony_ci} 80762306a36Sopenharmony_ci 80862306a36Sopenharmony_cistatic void sk_psock_done_strp(struct sk_psock *psock); 80962306a36Sopenharmony_ci 81062306a36Sopenharmony_cistatic void sk_psock_destroy(struct work_struct *work) 81162306a36Sopenharmony_ci{ 81262306a36Sopenharmony_ci struct sk_psock *psock = container_of(to_rcu_work(work), 81362306a36Sopenharmony_ci struct sk_psock, rwork); 81462306a36Sopenharmony_ci /* No sk_callback_lock since already detached. */ 81562306a36Sopenharmony_ci 81662306a36Sopenharmony_ci sk_psock_done_strp(psock); 81762306a36Sopenharmony_ci 81862306a36Sopenharmony_ci cancel_delayed_work_sync(&psock->work); 81962306a36Sopenharmony_ci __sk_psock_zap_ingress(psock); 82062306a36Sopenharmony_ci mutex_destroy(&psock->work_mutex); 82162306a36Sopenharmony_ci 82262306a36Sopenharmony_ci psock_progs_drop(&psock->progs); 82362306a36Sopenharmony_ci 82462306a36Sopenharmony_ci sk_psock_link_destroy(psock); 82562306a36Sopenharmony_ci sk_psock_cork_free(psock); 82662306a36Sopenharmony_ci 82762306a36Sopenharmony_ci if (psock->sk_redir) 82862306a36Sopenharmony_ci sock_put(psock->sk_redir); 82962306a36Sopenharmony_ci if (psock->sk_pair) 83062306a36Sopenharmony_ci sock_put(psock->sk_pair); 83162306a36Sopenharmony_ci sock_put(psock->sk); 83262306a36Sopenharmony_ci kfree(psock); 83362306a36Sopenharmony_ci} 83462306a36Sopenharmony_ci 83562306a36Sopenharmony_civoid sk_psock_drop(struct sock *sk, struct sk_psock *psock) 83662306a36Sopenharmony_ci{ 83762306a36Sopenharmony_ci write_lock_bh(&sk->sk_callback_lock); 83862306a36Sopenharmony_ci sk_psock_restore_proto(sk, psock); 83962306a36Sopenharmony_ci rcu_assign_sk_user_data(sk, NULL); 84062306a36Sopenharmony_ci if (psock->progs.stream_parser) 84162306a36Sopenharmony_ci sk_psock_stop_strp(sk, psock); 84262306a36Sopenharmony_ci else if (psock->progs.stream_verdict || psock->progs.skb_verdict) 84362306a36Sopenharmony_ci sk_psock_stop_verdict(sk, psock); 84462306a36Sopenharmony_ci write_unlock_bh(&sk->sk_callback_lock); 84562306a36Sopenharmony_ci 84662306a36Sopenharmony_ci sk_psock_stop(psock); 84762306a36Sopenharmony_ci 84862306a36Sopenharmony_ci INIT_RCU_WORK(&psock->rwork, sk_psock_destroy); 84962306a36Sopenharmony_ci queue_rcu_work(system_wq, &psock->rwork); 85062306a36Sopenharmony_ci} 85162306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(sk_psock_drop); 85262306a36Sopenharmony_ci 85362306a36Sopenharmony_cistatic int sk_psock_map_verd(int verdict, bool redir) 85462306a36Sopenharmony_ci{ 85562306a36Sopenharmony_ci switch (verdict) { 85662306a36Sopenharmony_ci case SK_PASS: 85762306a36Sopenharmony_ci return redir ? __SK_REDIRECT : __SK_PASS; 85862306a36Sopenharmony_ci case SK_DROP: 85962306a36Sopenharmony_ci default: 86062306a36Sopenharmony_ci break; 86162306a36Sopenharmony_ci } 86262306a36Sopenharmony_ci 86362306a36Sopenharmony_ci return __SK_DROP; 86462306a36Sopenharmony_ci} 86562306a36Sopenharmony_ci 86662306a36Sopenharmony_ciint sk_psock_msg_verdict(struct sock *sk, struct sk_psock *psock, 86762306a36Sopenharmony_ci struct sk_msg *msg) 86862306a36Sopenharmony_ci{ 86962306a36Sopenharmony_ci struct bpf_prog *prog; 87062306a36Sopenharmony_ci int ret; 87162306a36Sopenharmony_ci 87262306a36Sopenharmony_ci rcu_read_lock(); 87362306a36Sopenharmony_ci prog = READ_ONCE(psock->progs.msg_parser); 87462306a36Sopenharmony_ci if (unlikely(!prog)) { 87562306a36Sopenharmony_ci ret = __SK_PASS; 87662306a36Sopenharmony_ci goto out; 87762306a36Sopenharmony_ci } 87862306a36Sopenharmony_ci 87962306a36Sopenharmony_ci sk_msg_compute_data_pointers(msg); 88062306a36Sopenharmony_ci msg->sk = sk; 88162306a36Sopenharmony_ci ret = bpf_prog_run_pin_on_cpu(prog, msg); 88262306a36Sopenharmony_ci ret = sk_psock_map_verd(ret, msg->sk_redir); 88362306a36Sopenharmony_ci psock->apply_bytes = msg->apply_bytes; 88462306a36Sopenharmony_ci if (ret == __SK_REDIRECT) { 88562306a36Sopenharmony_ci if (psock->sk_redir) { 88662306a36Sopenharmony_ci sock_put(psock->sk_redir); 88762306a36Sopenharmony_ci psock->sk_redir = NULL; 88862306a36Sopenharmony_ci } 88962306a36Sopenharmony_ci if (!msg->sk_redir) { 89062306a36Sopenharmony_ci ret = __SK_DROP; 89162306a36Sopenharmony_ci goto out; 89262306a36Sopenharmony_ci } 89362306a36Sopenharmony_ci psock->redir_ingress = sk_msg_to_ingress(msg); 89462306a36Sopenharmony_ci psock->sk_redir = msg->sk_redir; 89562306a36Sopenharmony_ci sock_hold(psock->sk_redir); 89662306a36Sopenharmony_ci } 89762306a36Sopenharmony_ciout: 89862306a36Sopenharmony_ci rcu_read_unlock(); 89962306a36Sopenharmony_ci return ret; 90062306a36Sopenharmony_ci} 90162306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(sk_psock_msg_verdict); 90262306a36Sopenharmony_ci 90362306a36Sopenharmony_cistatic int sk_psock_skb_redirect(struct sk_psock *from, struct sk_buff *skb) 90462306a36Sopenharmony_ci{ 90562306a36Sopenharmony_ci struct sk_psock *psock_other; 90662306a36Sopenharmony_ci struct sock *sk_other; 90762306a36Sopenharmony_ci 90862306a36Sopenharmony_ci sk_other = skb_bpf_redirect_fetch(skb); 90962306a36Sopenharmony_ci /* This error is a buggy BPF program, it returned a redirect 91062306a36Sopenharmony_ci * return code, but then didn't set a redirect interface. 91162306a36Sopenharmony_ci */ 91262306a36Sopenharmony_ci if (unlikely(!sk_other)) { 91362306a36Sopenharmony_ci skb_bpf_redirect_clear(skb); 91462306a36Sopenharmony_ci sock_drop(from->sk, skb); 91562306a36Sopenharmony_ci return -EIO; 91662306a36Sopenharmony_ci } 91762306a36Sopenharmony_ci psock_other = sk_psock(sk_other); 91862306a36Sopenharmony_ci /* This error indicates the socket is being torn down or had another 91962306a36Sopenharmony_ci * error that caused the pipe to break. We can't send a packet on 92062306a36Sopenharmony_ci * a socket that is in this state so we drop the skb. 92162306a36Sopenharmony_ci */ 92262306a36Sopenharmony_ci if (!psock_other || sock_flag(sk_other, SOCK_DEAD)) { 92362306a36Sopenharmony_ci skb_bpf_redirect_clear(skb); 92462306a36Sopenharmony_ci sock_drop(from->sk, skb); 92562306a36Sopenharmony_ci return -EIO; 92662306a36Sopenharmony_ci } 92762306a36Sopenharmony_ci spin_lock_bh(&psock_other->ingress_lock); 92862306a36Sopenharmony_ci if (!sk_psock_test_state(psock_other, SK_PSOCK_TX_ENABLED)) { 92962306a36Sopenharmony_ci spin_unlock_bh(&psock_other->ingress_lock); 93062306a36Sopenharmony_ci skb_bpf_redirect_clear(skb); 93162306a36Sopenharmony_ci sock_drop(from->sk, skb); 93262306a36Sopenharmony_ci return -EIO; 93362306a36Sopenharmony_ci } 93462306a36Sopenharmony_ci 93562306a36Sopenharmony_ci skb_queue_tail(&psock_other->ingress_skb, skb); 93662306a36Sopenharmony_ci schedule_delayed_work(&psock_other->work, 0); 93762306a36Sopenharmony_ci spin_unlock_bh(&psock_other->ingress_lock); 93862306a36Sopenharmony_ci return 0; 93962306a36Sopenharmony_ci} 94062306a36Sopenharmony_ci 94162306a36Sopenharmony_cistatic void sk_psock_tls_verdict_apply(struct sk_buff *skb, 94262306a36Sopenharmony_ci struct sk_psock *from, int verdict) 94362306a36Sopenharmony_ci{ 94462306a36Sopenharmony_ci switch (verdict) { 94562306a36Sopenharmony_ci case __SK_REDIRECT: 94662306a36Sopenharmony_ci sk_psock_skb_redirect(from, skb); 94762306a36Sopenharmony_ci break; 94862306a36Sopenharmony_ci case __SK_PASS: 94962306a36Sopenharmony_ci case __SK_DROP: 95062306a36Sopenharmony_ci default: 95162306a36Sopenharmony_ci break; 95262306a36Sopenharmony_ci } 95362306a36Sopenharmony_ci} 95462306a36Sopenharmony_ci 95562306a36Sopenharmony_ciint sk_psock_tls_strp_read(struct sk_psock *psock, struct sk_buff *skb) 95662306a36Sopenharmony_ci{ 95762306a36Sopenharmony_ci struct bpf_prog *prog; 95862306a36Sopenharmony_ci int ret = __SK_PASS; 95962306a36Sopenharmony_ci 96062306a36Sopenharmony_ci rcu_read_lock(); 96162306a36Sopenharmony_ci prog = READ_ONCE(psock->progs.stream_verdict); 96262306a36Sopenharmony_ci if (likely(prog)) { 96362306a36Sopenharmony_ci skb->sk = psock->sk; 96462306a36Sopenharmony_ci skb_dst_drop(skb); 96562306a36Sopenharmony_ci skb_bpf_redirect_clear(skb); 96662306a36Sopenharmony_ci ret = bpf_prog_run_pin_on_cpu(prog, skb); 96762306a36Sopenharmony_ci ret = sk_psock_map_verd(ret, skb_bpf_redirect_fetch(skb)); 96862306a36Sopenharmony_ci skb->sk = NULL; 96962306a36Sopenharmony_ci } 97062306a36Sopenharmony_ci sk_psock_tls_verdict_apply(skb, psock, ret); 97162306a36Sopenharmony_ci rcu_read_unlock(); 97262306a36Sopenharmony_ci return ret; 97362306a36Sopenharmony_ci} 97462306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(sk_psock_tls_strp_read); 97562306a36Sopenharmony_ci 97662306a36Sopenharmony_cistatic int sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb, 97762306a36Sopenharmony_ci int verdict) 97862306a36Sopenharmony_ci{ 97962306a36Sopenharmony_ci struct sock *sk_other; 98062306a36Sopenharmony_ci int err = 0; 98162306a36Sopenharmony_ci u32 len, off; 98262306a36Sopenharmony_ci 98362306a36Sopenharmony_ci switch (verdict) { 98462306a36Sopenharmony_ci case __SK_PASS: 98562306a36Sopenharmony_ci err = -EIO; 98662306a36Sopenharmony_ci sk_other = psock->sk; 98762306a36Sopenharmony_ci if (sock_flag(sk_other, SOCK_DEAD) || 98862306a36Sopenharmony_ci !sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) 98962306a36Sopenharmony_ci goto out_free; 99062306a36Sopenharmony_ci 99162306a36Sopenharmony_ci skb_bpf_set_ingress(skb); 99262306a36Sopenharmony_ci 99362306a36Sopenharmony_ci /* If the queue is empty then we can submit directly 99462306a36Sopenharmony_ci * into the msg queue. If its not empty we have to 99562306a36Sopenharmony_ci * queue work otherwise we may get OOO data. Otherwise, 99662306a36Sopenharmony_ci * if sk_psock_skb_ingress errors will be handled by 99762306a36Sopenharmony_ci * retrying later from workqueue. 99862306a36Sopenharmony_ci */ 99962306a36Sopenharmony_ci if (skb_queue_empty(&psock->ingress_skb)) { 100062306a36Sopenharmony_ci len = skb->len; 100162306a36Sopenharmony_ci off = 0; 100262306a36Sopenharmony_ci if (skb_bpf_strparser(skb)) { 100362306a36Sopenharmony_ci struct strp_msg *stm = strp_msg(skb); 100462306a36Sopenharmony_ci 100562306a36Sopenharmony_ci off = stm->offset; 100662306a36Sopenharmony_ci len = stm->full_len; 100762306a36Sopenharmony_ci } 100862306a36Sopenharmony_ci err = sk_psock_skb_ingress_self(psock, skb, off, len); 100962306a36Sopenharmony_ci } 101062306a36Sopenharmony_ci if (err < 0) { 101162306a36Sopenharmony_ci spin_lock_bh(&psock->ingress_lock); 101262306a36Sopenharmony_ci if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) { 101362306a36Sopenharmony_ci skb_queue_tail(&psock->ingress_skb, skb); 101462306a36Sopenharmony_ci schedule_delayed_work(&psock->work, 0); 101562306a36Sopenharmony_ci err = 0; 101662306a36Sopenharmony_ci } 101762306a36Sopenharmony_ci spin_unlock_bh(&psock->ingress_lock); 101862306a36Sopenharmony_ci if (err < 0) 101962306a36Sopenharmony_ci goto out_free; 102062306a36Sopenharmony_ci } 102162306a36Sopenharmony_ci break; 102262306a36Sopenharmony_ci case __SK_REDIRECT: 102362306a36Sopenharmony_ci tcp_eat_skb(psock->sk, skb); 102462306a36Sopenharmony_ci err = sk_psock_skb_redirect(psock, skb); 102562306a36Sopenharmony_ci break; 102662306a36Sopenharmony_ci case __SK_DROP: 102762306a36Sopenharmony_ci default: 102862306a36Sopenharmony_ciout_free: 102962306a36Sopenharmony_ci skb_bpf_redirect_clear(skb); 103062306a36Sopenharmony_ci tcp_eat_skb(psock->sk, skb); 103162306a36Sopenharmony_ci sock_drop(psock->sk, skb); 103262306a36Sopenharmony_ci } 103362306a36Sopenharmony_ci 103462306a36Sopenharmony_ci return err; 103562306a36Sopenharmony_ci} 103662306a36Sopenharmony_ci 103762306a36Sopenharmony_cistatic void sk_psock_write_space(struct sock *sk) 103862306a36Sopenharmony_ci{ 103962306a36Sopenharmony_ci struct sk_psock *psock; 104062306a36Sopenharmony_ci void (*write_space)(struct sock *sk) = NULL; 104162306a36Sopenharmony_ci 104262306a36Sopenharmony_ci rcu_read_lock(); 104362306a36Sopenharmony_ci psock = sk_psock(sk); 104462306a36Sopenharmony_ci if (likely(psock)) { 104562306a36Sopenharmony_ci if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) 104662306a36Sopenharmony_ci schedule_delayed_work(&psock->work, 0); 104762306a36Sopenharmony_ci write_space = psock->saved_write_space; 104862306a36Sopenharmony_ci } 104962306a36Sopenharmony_ci rcu_read_unlock(); 105062306a36Sopenharmony_ci if (write_space) 105162306a36Sopenharmony_ci write_space(sk); 105262306a36Sopenharmony_ci} 105362306a36Sopenharmony_ci 105462306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER) 105562306a36Sopenharmony_cistatic void sk_psock_strp_read(struct strparser *strp, struct sk_buff *skb) 105662306a36Sopenharmony_ci{ 105762306a36Sopenharmony_ci struct sk_psock *psock; 105862306a36Sopenharmony_ci struct bpf_prog *prog; 105962306a36Sopenharmony_ci int ret = __SK_DROP; 106062306a36Sopenharmony_ci struct sock *sk; 106162306a36Sopenharmony_ci 106262306a36Sopenharmony_ci rcu_read_lock(); 106362306a36Sopenharmony_ci sk = strp->sk; 106462306a36Sopenharmony_ci psock = sk_psock(sk); 106562306a36Sopenharmony_ci if (unlikely(!psock)) { 106662306a36Sopenharmony_ci sock_drop(sk, skb); 106762306a36Sopenharmony_ci goto out; 106862306a36Sopenharmony_ci } 106962306a36Sopenharmony_ci prog = READ_ONCE(psock->progs.stream_verdict); 107062306a36Sopenharmony_ci if (likely(prog)) { 107162306a36Sopenharmony_ci skb->sk = sk; 107262306a36Sopenharmony_ci skb_dst_drop(skb); 107362306a36Sopenharmony_ci skb_bpf_redirect_clear(skb); 107462306a36Sopenharmony_ci ret = bpf_prog_run_pin_on_cpu(prog, skb); 107562306a36Sopenharmony_ci skb_bpf_set_strparser(skb); 107662306a36Sopenharmony_ci ret = sk_psock_map_verd(ret, skb_bpf_redirect_fetch(skb)); 107762306a36Sopenharmony_ci skb->sk = NULL; 107862306a36Sopenharmony_ci } 107962306a36Sopenharmony_ci sk_psock_verdict_apply(psock, skb, ret); 108062306a36Sopenharmony_ciout: 108162306a36Sopenharmony_ci rcu_read_unlock(); 108262306a36Sopenharmony_ci} 108362306a36Sopenharmony_ci 108462306a36Sopenharmony_cistatic int sk_psock_strp_read_done(struct strparser *strp, int err) 108562306a36Sopenharmony_ci{ 108662306a36Sopenharmony_ci return err; 108762306a36Sopenharmony_ci} 108862306a36Sopenharmony_ci 108962306a36Sopenharmony_cistatic int sk_psock_strp_parse(struct strparser *strp, struct sk_buff *skb) 109062306a36Sopenharmony_ci{ 109162306a36Sopenharmony_ci struct sk_psock *psock = container_of(strp, struct sk_psock, strp); 109262306a36Sopenharmony_ci struct bpf_prog *prog; 109362306a36Sopenharmony_ci int ret = skb->len; 109462306a36Sopenharmony_ci 109562306a36Sopenharmony_ci rcu_read_lock(); 109662306a36Sopenharmony_ci prog = READ_ONCE(psock->progs.stream_parser); 109762306a36Sopenharmony_ci if (likely(prog)) { 109862306a36Sopenharmony_ci skb->sk = psock->sk; 109962306a36Sopenharmony_ci ret = bpf_prog_run_pin_on_cpu(prog, skb); 110062306a36Sopenharmony_ci skb->sk = NULL; 110162306a36Sopenharmony_ci } 110262306a36Sopenharmony_ci rcu_read_unlock(); 110362306a36Sopenharmony_ci return ret; 110462306a36Sopenharmony_ci} 110562306a36Sopenharmony_ci 110662306a36Sopenharmony_ci/* Called with socket lock held. */ 110762306a36Sopenharmony_cistatic void sk_psock_strp_data_ready(struct sock *sk) 110862306a36Sopenharmony_ci{ 110962306a36Sopenharmony_ci struct sk_psock *psock; 111062306a36Sopenharmony_ci 111162306a36Sopenharmony_ci trace_sk_data_ready(sk); 111262306a36Sopenharmony_ci 111362306a36Sopenharmony_ci rcu_read_lock(); 111462306a36Sopenharmony_ci psock = sk_psock(sk); 111562306a36Sopenharmony_ci if (likely(psock)) { 111662306a36Sopenharmony_ci if (tls_sw_has_ctx_rx(sk)) { 111762306a36Sopenharmony_ci psock->saved_data_ready(sk); 111862306a36Sopenharmony_ci } else { 111962306a36Sopenharmony_ci write_lock_bh(&sk->sk_callback_lock); 112062306a36Sopenharmony_ci strp_data_ready(&psock->strp); 112162306a36Sopenharmony_ci write_unlock_bh(&sk->sk_callback_lock); 112262306a36Sopenharmony_ci } 112362306a36Sopenharmony_ci } 112462306a36Sopenharmony_ci rcu_read_unlock(); 112562306a36Sopenharmony_ci} 112662306a36Sopenharmony_ci 112762306a36Sopenharmony_ciint sk_psock_init_strp(struct sock *sk, struct sk_psock *psock) 112862306a36Sopenharmony_ci{ 112962306a36Sopenharmony_ci int ret; 113062306a36Sopenharmony_ci 113162306a36Sopenharmony_ci static const struct strp_callbacks cb = { 113262306a36Sopenharmony_ci .rcv_msg = sk_psock_strp_read, 113362306a36Sopenharmony_ci .read_sock_done = sk_psock_strp_read_done, 113462306a36Sopenharmony_ci .parse_msg = sk_psock_strp_parse, 113562306a36Sopenharmony_ci }; 113662306a36Sopenharmony_ci 113762306a36Sopenharmony_ci ret = strp_init(&psock->strp, sk, &cb); 113862306a36Sopenharmony_ci if (!ret) 113962306a36Sopenharmony_ci sk_psock_set_state(psock, SK_PSOCK_RX_STRP_ENABLED); 114062306a36Sopenharmony_ci 114162306a36Sopenharmony_ci return ret; 114262306a36Sopenharmony_ci} 114362306a36Sopenharmony_ci 114462306a36Sopenharmony_civoid sk_psock_start_strp(struct sock *sk, struct sk_psock *psock) 114562306a36Sopenharmony_ci{ 114662306a36Sopenharmony_ci if (psock->saved_data_ready) 114762306a36Sopenharmony_ci return; 114862306a36Sopenharmony_ci 114962306a36Sopenharmony_ci psock->saved_data_ready = sk->sk_data_ready; 115062306a36Sopenharmony_ci sk->sk_data_ready = sk_psock_strp_data_ready; 115162306a36Sopenharmony_ci sk->sk_write_space = sk_psock_write_space; 115262306a36Sopenharmony_ci} 115362306a36Sopenharmony_ci 115462306a36Sopenharmony_civoid sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock) 115562306a36Sopenharmony_ci{ 115662306a36Sopenharmony_ci psock_set_prog(&psock->progs.stream_parser, NULL); 115762306a36Sopenharmony_ci 115862306a36Sopenharmony_ci if (!psock->saved_data_ready) 115962306a36Sopenharmony_ci return; 116062306a36Sopenharmony_ci 116162306a36Sopenharmony_ci sk->sk_data_ready = psock->saved_data_ready; 116262306a36Sopenharmony_ci psock->saved_data_ready = NULL; 116362306a36Sopenharmony_ci strp_stop(&psock->strp); 116462306a36Sopenharmony_ci} 116562306a36Sopenharmony_ci 116662306a36Sopenharmony_cistatic void sk_psock_done_strp(struct sk_psock *psock) 116762306a36Sopenharmony_ci{ 116862306a36Sopenharmony_ci /* Parser has been stopped */ 116962306a36Sopenharmony_ci if (sk_psock_test_state(psock, SK_PSOCK_RX_STRP_ENABLED)) 117062306a36Sopenharmony_ci strp_done(&psock->strp); 117162306a36Sopenharmony_ci} 117262306a36Sopenharmony_ci#else 117362306a36Sopenharmony_cistatic void sk_psock_done_strp(struct sk_psock *psock) 117462306a36Sopenharmony_ci{ 117562306a36Sopenharmony_ci} 117662306a36Sopenharmony_ci#endif /* CONFIG_BPF_STREAM_PARSER */ 117762306a36Sopenharmony_ci 117862306a36Sopenharmony_cistatic int sk_psock_verdict_recv(struct sock *sk, struct sk_buff *skb) 117962306a36Sopenharmony_ci{ 118062306a36Sopenharmony_ci struct sk_psock *psock; 118162306a36Sopenharmony_ci struct bpf_prog *prog; 118262306a36Sopenharmony_ci int ret = __SK_DROP; 118362306a36Sopenharmony_ci int len = skb->len; 118462306a36Sopenharmony_ci 118562306a36Sopenharmony_ci rcu_read_lock(); 118662306a36Sopenharmony_ci psock = sk_psock(sk); 118762306a36Sopenharmony_ci if (unlikely(!psock)) { 118862306a36Sopenharmony_ci len = 0; 118962306a36Sopenharmony_ci tcp_eat_skb(sk, skb); 119062306a36Sopenharmony_ci sock_drop(sk, skb); 119162306a36Sopenharmony_ci goto out; 119262306a36Sopenharmony_ci } 119362306a36Sopenharmony_ci prog = READ_ONCE(psock->progs.stream_verdict); 119462306a36Sopenharmony_ci if (!prog) 119562306a36Sopenharmony_ci prog = READ_ONCE(psock->progs.skb_verdict); 119662306a36Sopenharmony_ci if (likely(prog)) { 119762306a36Sopenharmony_ci skb_dst_drop(skb); 119862306a36Sopenharmony_ci skb_bpf_redirect_clear(skb); 119962306a36Sopenharmony_ci ret = bpf_prog_run_pin_on_cpu(prog, skb); 120062306a36Sopenharmony_ci ret = sk_psock_map_verd(ret, skb_bpf_redirect_fetch(skb)); 120162306a36Sopenharmony_ci } 120262306a36Sopenharmony_ci ret = sk_psock_verdict_apply(psock, skb, ret); 120362306a36Sopenharmony_ci if (ret < 0) 120462306a36Sopenharmony_ci len = ret; 120562306a36Sopenharmony_ciout: 120662306a36Sopenharmony_ci rcu_read_unlock(); 120762306a36Sopenharmony_ci return len; 120862306a36Sopenharmony_ci} 120962306a36Sopenharmony_ci 121062306a36Sopenharmony_cistatic void sk_psock_verdict_data_ready(struct sock *sk) 121162306a36Sopenharmony_ci{ 121262306a36Sopenharmony_ci struct socket *sock = sk->sk_socket; 121362306a36Sopenharmony_ci const struct proto_ops *ops; 121462306a36Sopenharmony_ci int copied; 121562306a36Sopenharmony_ci 121662306a36Sopenharmony_ci trace_sk_data_ready(sk); 121762306a36Sopenharmony_ci 121862306a36Sopenharmony_ci if (unlikely(!sock)) 121962306a36Sopenharmony_ci return; 122062306a36Sopenharmony_ci ops = READ_ONCE(sock->ops); 122162306a36Sopenharmony_ci if (!ops || !ops->read_skb) 122262306a36Sopenharmony_ci return; 122362306a36Sopenharmony_ci copied = ops->read_skb(sk, sk_psock_verdict_recv); 122462306a36Sopenharmony_ci if (copied >= 0) { 122562306a36Sopenharmony_ci struct sk_psock *psock; 122662306a36Sopenharmony_ci 122762306a36Sopenharmony_ci rcu_read_lock(); 122862306a36Sopenharmony_ci psock = sk_psock(sk); 122962306a36Sopenharmony_ci if (psock) { 123062306a36Sopenharmony_ci read_lock_bh(&sk->sk_callback_lock); 123162306a36Sopenharmony_ci sk_psock_data_ready(sk, psock); 123262306a36Sopenharmony_ci read_unlock_bh(&sk->sk_callback_lock); 123362306a36Sopenharmony_ci } 123462306a36Sopenharmony_ci rcu_read_unlock(); 123562306a36Sopenharmony_ci } 123662306a36Sopenharmony_ci} 123762306a36Sopenharmony_ci 123862306a36Sopenharmony_civoid sk_psock_start_verdict(struct sock *sk, struct sk_psock *psock) 123962306a36Sopenharmony_ci{ 124062306a36Sopenharmony_ci if (psock->saved_data_ready) 124162306a36Sopenharmony_ci return; 124262306a36Sopenharmony_ci 124362306a36Sopenharmony_ci psock->saved_data_ready = sk->sk_data_ready; 124462306a36Sopenharmony_ci sk->sk_data_ready = sk_psock_verdict_data_ready; 124562306a36Sopenharmony_ci sk->sk_write_space = sk_psock_write_space; 124662306a36Sopenharmony_ci} 124762306a36Sopenharmony_ci 124862306a36Sopenharmony_civoid sk_psock_stop_verdict(struct sock *sk, struct sk_psock *psock) 124962306a36Sopenharmony_ci{ 125062306a36Sopenharmony_ci psock_set_prog(&psock->progs.stream_verdict, NULL); 125162306a36Sopenharmony_ci psock_set_prog(&psock->progs.skb_verdict, NULL); 125262306a36Sopenharmony_ci 125362306a36Sopenharmony_ci if (!psock->saved_data_ready) 125462306a36Sopenharmony_ci return; 125562306a36Sopenharmony_ci 125662306a36Sopenharmony_ci sk->sk_data_ready = psock->saved_data_ready; 125762306a36Sopenharmony_ci psock->saved_data_ready = NULL; 125862306a36Sopenharmony_ci} 1259