18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0 */ 28c2ecf20Sopenharmony_ci#ifndef __BPF_TCP_HELPERS_H 38c2ecf20Sopenharmony_ci#define __BPF_TCP_HELPERS_H 48c2ecf20Sopenharmony_ci 58c2ecf20Sopenharmony_ci#include <stdbool.h> 68c2ecf20Sopenharmony_ci#include <linux/types.h> 78c2ecf20Sopenharmony_ci#include <bpf/bpf_helpers.h> 88c2ecf20Sopenharmony_ci#include <bpf/bpf_core_read.h> 98c2ecf20Sopenharmony_ci#include <bpf/bpf_tracing.h> 108c2ecf20Sopenharmony_ci 118c2ecf20Sopenharmony_ci#define BPF_STRUCT_OPS(name, args...) \ 128c2ecf20Sopenharmony_ciSEC("struct_ops/"#name) \ 138c2ecf20Sopenharmony_ciBPF_PROG(name, args) 148c2ecf20Sopenharmony_ci 158c2ecf20Sopenharmony_ci#define tcp_jiffies32 ((__u32)bpf_jiffies64()) 168c2ecf20Sopenharmony_ci 178c2ecf20Sopenharmony_cistruct sock_common { 188c2ecf20Sopenharmony_ci unsigned char skc_state; 198c2ecf20Sopenharmony_ci __u16 skc_num; 208c2ecf20Sopenharmony_ci} __attribute__((preserve_access_index)); 218c2ecf20Sopenharmony_ci 228c2ecf20Sopenharmony_cienum sk_pacing { 238c2ecf20Sopenharmony_ci SK_PACING_NONE = 0, 248c2ecf20Sopenharmony_ci SK_PACING_NEEDED = 1, 258c2ecf20Sopenharmony_ci SK_PACING_FQ = 2, 268c2ecf20Sopenharmony_ci}; 278c2ecf20Sopenharmony_ci 288c2ecf20Sopenharmony_cistruct sock { 298c2ecf20Sopenharmony_ci struct sock_common __sk_common; 308c2ecf20Sopenharmony_ci unsigned long sk_pacing_rate; 318c2ecf20Sopenharmony_ci __u32 sk_pacing_status; /* see enum sk_pacing */ 328c2ecf20Sopenharmony_ci} __attribute__((preserve_access_index)); 338c2ecf20Sopenharmony_ci 348c2ecf20Sopenharmony_cistruct inet_sock { 358c2ecf20Sopenharmony_ci struct sock sk; 368c2ecf20Sopenharmony_ci} __attribute__((preserve_access_index)); 378c2ecf20Sopenharmony_ci 388c2ecf20Sopenharmony_cistruct inet_connection_sock { 398c2ecf20Sopenharmony_ci struct inet_sock icsk_inet; 408c2ecf20Sopenharmony_ci __u8 icsk_ca_state:6, 418c2ecf20Sopenharmony_ci icsk_ca_setsockopt:1, 428c2ecf20Sopenharmony_ci icsk_ca_dst_locked:1; 438c2ecf20Sopenharmony_ci struct { 448c2ecf20Sopenharmony_ci __u8 pending; 458c2ecf20Sopenharmony_ci } icsk_ack; 468c2ecf20Sopenharmony_ci __u64 icsk_ca_priv[104 / sizeof(__u64)]; 478c2ecf20Sopenharmony_ci} __attribute__((preserve_access_index)); 488c2ecf20Sopenharmony_ci 498c2ecf20Sopenharmony_cistruct request_sock { 508c2ecf20Sopenharmony_ci struct sock_common __req_common; 518c2ecf20Sopenharmony_ci} __attribute__((preserve_access_index)); 528c2ecf20Sopenharmony_ci 538c2ecf20Sopenharmony_cistruct tcp_sock { 548c2ecf20Sopenharmony_ci struct inet_connection_sock inet_conn; 558c2ecf20Sopenharmony_ci 568c2ecf20Sopenharmony_ci __u32 rcv_nxt; 578c2ecf20Sopenharmony_ci __u32 snd_nxt; 588c2ecf20Sopenharmony_ci __u32 snd_una; 598c2ecf20Sopenharmony_ci __u8 ecn_flags; 608c2ecf20Sopenharmony_ci __u32 delivered; 618c2ecf20Sopenharmony_ci __u32 delivered_ce; 628c2ecf20Sopenharmony_ci __u32 snd_cwnd; 638c2ecf20Sopenharmony_ci __u32 snd_cwnd_cnt; 648c2ecf20Sopenharmony_ci __u32 snd_cwnd_clamp; 658c2ecf20Sopenharmony_ci __u32 snd_ssthresh; 668c2ecf20Sopenharmony_ci __u8 syn_data:1, /* SYN includes data */ 678c2ecf20Sopenharmony_ci syn_fastopen:1, /* SYN includes Fast Open option */ 688c2ecf20Sopenharmony_ci syn_fastopen_exp:1,/* SYN includes Fast Open exp. option */ 698c2ecf20Sopenharmony_ci syn_fastopen_ch:1, /* Active TFO re-enabling probe */ 708c2ecf20Sopenharmony_ci syn_data_acked:1,/* data in SYN is acked by SYN-ACK */ 718c2ecf20Sopenharmony_ci save_syn:1, /* Save headers of SYN packet */ 728c2ecf20Sopenharmony_ci is_cwnd_limited:1,/* forward progress limited by snd_cwnd? */ 738c2ecf20Sopenharmony_ci syn_smc:1; /* SYN includes SMC */ 748c2ecf20Sopenharmony_ci __u32 max_packets_out; 758c2ecf20Sopenharmony_ci __u32 lsndtime; 768c2ecf20Sopenharmony_ci __u32 prior_cwnd; 778c2ecf20Sopenharmony_ci __u64 tcp_mstamp; /* most recent packet received/sent */ 788c2ecf20Sopenharmony_ci} __attribute__((preserve_access_index)); 798c2ecf20Sopenharmony_ci 808c2ecf20Sopenharmony_cistatic __always_inline struct inet_connection_sock *inet_csk(const struct sock *sk) 818c2ecf20Sopenharmony_ci{ 828c2ecf20Sopenharmony_ci return (struct inet_connection_sock *)sk; 838c2ecf20Sopenharmony_ci} 848c2ecf20Sopenharmony_ci 858c2ecf20Sopenharmony_cistatic __always_inline void *inet_csk_ca(const struct sock *sk) 868c2ecf20Sopenharmony_ci{ 878c2ecf20Sopenharmony_ci return (void *)inet_csk(sk)->icsk_ca_priv; 888c2ecf20Sopenharmony_ci} 898c2ecf20Sopenharmony_ci 908c2ecf20Sopenharmony_cistatic __always_inline struct tcp_sock *tcp_sk(const struct sock *sk) 918c2ecf20Sopenharmony_ci{ 928c2ecf20Sopenharmony_ci return (struct tcp_sock *)sk; 938c2ecf20Sopenharmony_ci} 948c2ecf20Sopenharmony_ci 958c2ecf20Sopenharmony_cistatic __always_inline bool before(__u32 seq1, __u32 seq2) 968c2ecf20Sopenharmony_ci{ 978c2ecf20Sopenharmony_ci return (__s32)(seq1-seq2) < 0; 988c2ecf20Sopenharmony_ci} 998c2ecf20Sopenharmony_ci#define after(seq2, seq1) before(seq1, seq2) 1008c2ecf20Sopenharmony_ci 1018c2ecf20Sopenharmony_ci#define TCP_ECN_OK 1 1028c2ecf20Sopenharmony_ci#define TCP_ECN_QUEUE_CWR 2 1038c2ecf20Sopenharmony_ci#define TCP_ECN_DEMAND_CWR 4 1048c2ecf20Sopenharmony_ci#define TCP_ECN_SEEN 8 1058c2ecf20Sopenharmony_ci 1068c2ecf20Sopenharmony_cienum inet_csk_ack_state_t { 1078c2ecf20Sopenharmony_ci ICSK_ACK_SCHED = 1, 1088c2ecf20Sopenharmony_ci ICSK_ACK_TIMER = 2, 1098c2ecf20Sopenharmony_ci ICSK_ACK_PUSHED = 4, 1108c2ecf20Sopenharmony_ci ICSK_ACK_PUSHED2 = 8, 1118c2ecf20Sopenharmony_ci ICSK_ACK_NOW = 16 /* Send the next ACK immediately (once) */ 1128c2ecf20Sopenharmony_ci}; 1138c2ecf20Sopenharmony_ci 1148c2ecf20Sopenharmony_cienum tcp_ca_event { 1158c2ecf20Sopenharmony_ci CA_EVENT_TX_START = 0, 1168c2ecf20Sopenharmony_ci CA_EVENT_CWND_RESTART = 1, 1178c2ecf20Sopenharmony_ci CA_EVENT_COMPLETE_CWR = 2, 1188c2ecf20Sopenharmony_ci CA_EVENT_LOSS = 3, 1198c2ecf20Sopenharmony_ci CA_EVENT_ECN_NO_CE = 4, 1208c2ecf20Sopenharmony_ci CA_EVENT_ECN_IS_CE = 5, 1218c2ecf20Sopenharmony_ci}; 1228c2ecf20Sopenharmony_ci 1238c2ecf20Sopenharmony_cistruct ack_sample { 1248c2ecf20Sopenharmony_ci __u32 pkts_acked; 1258c2ecf20Sopenharmony_ci __s32 rtt_us; 1268c2ecf20Sopenharmony_ci __u32 in_flight; 1278c2ecf20Sopenharmony_ci} __attribute__((preserve_access_index)); 1288c2ecf20Sopenharmony_ci 1298c2ecf20Sopenharmony_cistruct rate_sample { 1308c2ecf20Sopenharmony_ci __u64 prior_mstamp; /* starting timestamp for interval */ 1318c2ecf20Sopenharmony_ci __u32 prior_delivered; /* tp->delivered at "prior_mstamp" */ 1328c2ecf20Sopenharmony_ci __s32 delivered; /* number of packets delivered over interval */ 1338c2ecf20Sopenharmony_ci long interval_us; /* time for tp->delivered to incr "delivered" */ 1348c2ecf20Sopenharmony_ci __u32 snd_interval_us; /* snd interval for delivered packets */ 1358c2ecf20Sopenharmony_ci __u32 rcv_interval_us; /* rcv interval for delivered packets */ 1368c2ecf20Sopenharmony_ci long rtt_us; /* RTT of last (S)ACKed packet (or -1) */ 1378c2ecf20Sopenharmony_ci int losses; /* number of packets marked lost upon ACK */ 1388c2ecf20Sopenharmony_ci __u32 acked_sacked; /* number of packets newly (S)ACKed upon ACK */ 1398c2ecf20Sopenharmony_ci __u32 prior_in_flight; /* in flight before this ACK */ 1408c2ecf20Sopenharmony_ci bool is_app_limited; /* is sample from packet with bubble in pipe? */ 1418c2ecf20Sopenharmony_ci bool is_retrans; /* is sample from retransmission? */ 1428c2ecf20Sopenharmony_ci bool is_ack_delayed; /* is this (likely) a delayed ACK? */ 1438c2ecf20Sopenharmony_ci} __attribute__((preserve_access_index)); 1448c2ecf20Sopenharmony_ci 1458c2ecf20Sopenharmony_ci#define TCP_CA_NAME_MAX 16 1468c2ecf20Sopenharmony_ci#define TCP_CONG_NEEDS_ECN 0x2 1478c2ecf20Sopenharmony_ci 1488c2ecf20Sopenharmony_cistruct tcp_congestion_ops { 1498c2ecf20Sopenharmony_ci char name[TCP_CA_NAME_MAX]; 1508c2ecf20Sopenharmony_ci __u32 flags; 1518c2ecf20Sopenharmony_ci 1528c2ecf20Sopenharmony_ci /* initialize private data (optional) */ 1538c2ecf20Sopenharmony_ci void (*init)(struct sock *sk); 1548c2ecf20Sopenharmony_ci /* cleanup private data (optional) */ 1558c2ecf20Sopenharmony_ci void (*release)(struct sock *sk); 1568c2ecf20Sopenharmony_ci 1578c2ecf20Sopenharmony_ci /* return slow start threshold (required) */ 1588c2ecf20Sopenharmony_ci __u32 (*ssthresh)(struct sock *sk); 1598c2ecf20Sopenharmony_ci /* do new cwnd calculation (required) */ 1608c2ecf20Sopenharmony_ci void (*cong_avoid)(struct sock *sk, __u32 ack, __u32 acked); 1618c2ecf20Sopenharmony_ci /* call before changing ca_state (optional) */ 1628c2ecf20Sopenharmony_ci void (*set_state)(struct sock *sk, __u8 new_state); 1638c2ecf20Sopenharmony_ci /* call when cwnd event occurs (optional) */ 1648c2ecf20Sopenharmony_ci void (*cwnd_event)(struct sock *sk, enum tcp_ca_event ev); 1658c2ecf20Sopenharmony_ci /* call when ack arrives (optional) */ 1668c2ecf20Sopenharmony_ci void (*in_ack_event)(struct sock *sk, __u32 flags); 1678c2ecf20Sopenharmony_ci /* new value of cwnd after loss (required) */ 1688c2ecf20Sopenharmony_ci __u32 (*undo_cwnd)(struct sock *sk); 1698c2ecf20Sopenharmony_ci /* hook for packet ack accounting (optional) */ 1708c2ecf20Sopenharmony_ci void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample); 1718c2ecf20Sopenharmony_ci /* override sysctl_tcp_min_tso_segs */ 1728c2ecf20Sopenharmony_ci __u32 (*min_tso_segs)(struct sock *sk); 1738c2ecf20Sopenharmony_ci /* returns the multiplier used in tcp_sndbuf_expand (optional) */ 1748c2ecf20Sopenharmony_ci __u32 (*sndbuf_expand)(struct sock *sk); 1758c2ecf20Sopenharmony_ci /* call when packets are delivered to update cwnd and pacing rate, 1768c2ecf20Sopenharmony_ci * after all the ca_state processing. (optional) 1778c2ecf20Sopenharmony_ci */ 1788c2ecf20Sopenharmony_ci void (*cong_control)(struct sock *sk, const struct rate_sample *rs); 1798c2ecf20Sopenharmony_ci}; 1808c2ecf20Sopenharmony_ci 1818c2ecf20Sopenharmony_ci#define min(a, b) ((a) < (b) ? (a) : (b)) 1828c2ecf20Sopenharmony_ci#define max(a, b) ((a) > (b) ? (a) : (b)) 1838c2ecf20Sopenharmony_ci#define min_not_zero(x, y) ({ \ 1848c2ecf20Sopenharmony_ci typeof(x) __x = (x); \ 1858c2ecf20Sopenharmony_ci typeof(y) __y = (y); \ 1868c2ecf20Sopenharmony_ci __x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); }) 1878c2ecf20Sopenharmony_ci 1888c2ecf20Sopenharmony_cistatic __always_inline __u32 tcp_slow_start(struct tcp_sock *tp, __u32 acked) 1898c2ecf20Sopenharmony_ci{ 1908c2ecf20Sopenharmony_ci __u32 cwnd = min(tp->snd_cwnd + acked, tp->snd_ssthresh); 1918c2ecf20Sopenharmony_ci 1928c2ecf20Sopenharmony_ci acked -= cwnd - tp->snd_cwnd; 1938c2ecf20Sopenharmony_ci tp->snd_cwnd = min(cwnd, tp->snd_cwnd_clamp); 1948c2ecf20Sopenharmony_ci 1958c2ecf20Sopenharmony_ci return acked; 1968c2ecf20Sopenharmony_ci} 1978c2ecf20Sopenharmony_ci 1988c2ecf20Sopenharmony_cistatic __always_inline bool tcp_in_slow_start(const struct tcp_sock *tp) 1998c2ecf20Sopenharmony_ci{ 2008c2ecf20Sopenharmony_ci return tp->snd_cwnd < tp->snd_ssthresh; 2018c2ecf20Sopenharmony_ci} 2028c2ecf20Sopenharmony_ci 2038c2ecf20Sopenharmony_cistatic __always_inline bool tcp_is_cwnd_limited(const struct sock *sk) 2048c2ecf20Sopenharmony_ci{ 2058c2ecf20Sopenharmony_ci const struct tcp_sock *tp = tcp_sk(sk); 2068c2ecf20Sopenharmony_ci 2078c2ecf20Sopenharmony_ci /* If in slow start, ensure cwnd grows to twice what was ACKed. */ 2088c2ecf20Sopenharmony_ci if (tcp_in_slow_start(tp)) 2098c2ecf20Sopenharmony_ci return tp->snd_cwnd < 2 * tp->max_packets_out; 2108c2ecf20Sopenharmony_ci 2118c2ecf20Sopenharmony_ci return !!BPF_CORE_READ_BITFIELD(tp, is_cwnd_limited); 2128c2ecf20Sopenharmony_ci} 2138c2ecf20Sopenharmony_ci 2148c2ecf20Sopenharmony_cistatic __always_inline void tcp_cong_avoid_ai(struct tcp_sock *tp, __u32 w, __u32 acked) 2158c2ecf20Sopenharmony_ci{ 2168c2ecf20Sopenharmony_ci /* If credits accumulated at a higher w, apply them gently now. */ 2178c2ecf20Sopenharmony_ci if (tp->snd_cwnd_cnt >= w) { 2188c2ecf20Sopenharmony_ci tp->snd_cwnd_cnt = 0; 2198c2ecf20Sopenharmony_ci tp->snd_cwnd++; 2208c2ecf20Sopenharmony_ci } 2218c2ecf20Sopenharmony_ci 2228c2ecf20Sopenharmony_ci tp->snd_cwnd_cnt += acked; 2238c2ecf20Sopenharmony_ci if (tp->snd_cwnd_cnt >= w) { 2248c2ecf20Sopenharmony_ci __u32 delta = tp->snd_cwnd_cnt / w; 2258c2ecf20Sopenharmony_ci 2268c2ecf20Sopenharmony_ci tp->snd_cwnd_cnt -= delta * w; 2278c2ecf20Sopenharmony_ci tp->snd_cwnd += delta; 2288c2ecf20Sopenharmony_ci } 2298c2ecf20Sopenharmony_ci tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_cwnd_clamp); 2308c2ecf20Sopenharmony_ci} 2318c2ecf20Sopenharmony_ci 2328c2ecf20Sopenharmony_ci#endif 233