162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * TCP Low Priority (TCP-LP) 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * TCP Low Priority is a distributed algorithm whose goal is to utilize only 662306a36Sopenharmony_ci * the excess network bandwidth as compared to the ``fair share`` of 762306a36Sopenharmony_ci * bandwidth as targeted by TCP. 862306a36Sopenharmony_ci * 962306a36Sopenharmony_ci * As of 2.6.13, Linux supports pluggable congestion control algorithms. 1062306a36Sopenharmony_ci * Due to the limitation of the API, we take the following changes from 1162306a36Sopenharmony_ci * the original TCP-LP implementation: 1262306a36Sopenharmony_ci * o We use newReno in most core CA handling. Only add some checking 1362306a36Sopenharmony_ci * within cong_avoid. 1462306a36Sopenharmony_ci * o Error correcting in remote HZ, therefore remote HZ will be keeped 1562306a36Sopenharmony_ci * on checking and updating. 1662306a36Sopenharmony_ci * o Handling calculation of One-Way-Delay (OWD) within rtt_sample, since 1762306a36Sopenharmony_ci * OWD have a similar meaning as RTT. Also correct the buggy formular. 1862306a36Sopenharmony_ci * o Handle reaction for Early Congestion Indication (ECI) within 1962306a36Sopenharmony_ci * pkts_acked, as mentioned within pseudo code. 2062306a36Sopenharmony_ci * o OWD is handled in relative format, where local time stamp will in 2162306a36Sopenharmony_ci * tcp_time_stamp format. 2262306a36Sopenharmony_ci * 2362306a36Sopenharmony_ci * Original Author: 2462306a36Sopenharmony_ci * Aleksandar Kuzmanovic <akuzma@northwestern.edu> 2562306a36Sopenharmony_ci * Available from: 2662306a36Sopenharmony_ci * http://www.ece.rice.edu/~akuzma/Doc/akuzma/TCP-LP.pdf 2762306a36Sopenharmony_ci * Original implementation for 2.4.19: 2862306a36Sopenharmony_ci * http://www-ece.rice.edu/networks/TCP-LP/ 2962306a36Sopenharmony_ci * 3062306a36Sopenharmony_ci * 2.6.x module Authors: 3162306a36Sopenharmony_ci * Wong Hoi Sing, Edison <hswong3i@gmail.com> 3262306a36Sopenharmony_ci * Hung Hing Lun, Mike <hlhung3i@gmail.com> 3362306a36Sopenharmony_ci * SourceForge project page: 3462306a36Sopenharmony_ci * http://tcp-lp-mod.sourceforge.net/ 3562306a36Sopenharmony_ci */ 3662306a36Sopenharmony_ci 3762306a36Sopenharmony_ci#include <linux/module.h> 3862306a36Sopenharmony_ci#include <net/tcp.h> 3962306a36Sopenharmony_ci 4062306a36Sopenharmony_ci/* resolution of owd */ 4162306a36Sopenharmony_ci#define LP_RESOL TCP_TS_HZ 4262306a36Sopenharmony_ci 4362306a36Sopenharmony_ci/** 4462306a36Sopenharmony_ci * enum tcp_lp_state 4562306a36Sopenharmony_ci * @LP_VALID_RHZ: is remote HZ valid? 4662306a36Sopenharmony_ci * @LP_VALID_OWD: is OWD valid? 4762306a36Sopenharmony_ci * @LP_WITHIN_THR: are we within threshold? 4862306a36Sopenharmony_ci * @LP_WITHIN_INF: are we within inference? 4962306a36Sopenharmony_ci * 5062306a36Sopenharmony_ci * TCP-LP's state flags. 5162306a36Sopenharmony_ci * We create this set of state flag mainly for debugging. 5262306a36Sopenharmony_ci */ 5362306a36Sopenharmony_cienum tcp_lp_state { 5462306a36Sopenharmony_ci LP_VALID_RHZ = (1 << 0), 5562306a36Sopenharmony_ci LP_VALID_OWD = (1 << 1), 5662306a36Sopenharmony_ci LP_WITHIN_THR = (1 << 3), 5762306a36Sopenharmony_ci LP_WITHIN_INF = (1 << 4), 5862306a36Sopenharmony_ci}; 5962306a36Sopenharmony_ci 6062306a36Sopenharmony_ci/** 6162306a36Sopenharmony_ci * struct lp 6262306a36Sopenharmony_ci * @flag: TCP-LP state flag 6362306a36Sopenharmony_ci * @sowd: smoothed OWD << 3 6462306a36Sopenharmony_ci * @owd_min: min OWD 6562306a36Sopenharmony_ci * @owd_max: max OWD 6662306a36Sopenharmony_ci * @owd_max_rsv: reserved max owd 6762306a36Sopenharmony_ci * @remote_hz: estimated remote HZ 6862306a36Sopenharmony_ci * @remote_ref_time: remote reference time 6962306a36Sopenharmony_ci * @local_ref_time: local reference time 7062306a36Sopenharmony_ci * @last_drop: time for last active drop 7162306a36Sopenharmony_ci * @inference: current inference 7262306a36Sopenharmony_ci * 7362306a36Sopenharmony_ci * TCP-LP's private struct. 7462306a36Sopenharmony_ci * We get the idea from original TCP-LP implementation where only left those we 7562306a36Sopenharmony_ci * found are really useful. 7662306a36Sopenharmony_ci */ 7762306a36Sopenharmony_cistruct lp { 7862306a36Sopenharmony_ci u32 flag; 7962306a36Sopenharmony_ci u32 sowd; 8062306a36Sopenharmony_ci u32 owd_min; 8162306a36Sopenharmony_ci u32 owd_max; 8262306a36Sopenharmony_ci u32 owd_max_rsv; 8362306a36Sopenharmony_ci u32 remote_hz; 8462306a36Sopenharmony_ci u32 remote_ref_time; 8562306a36Sopenharmony_ci u32 local_ref_time; 8662306a36Sopenharmony_ci u32 last_drop; 8762306a36Sopenharmony_ci u32 inference; 8862306a36Sopenharmony_ci}; 8962306a36Sopenharmony_ci 9062306a36Sopenharmony_ci/** 9162306a36Sopenharmony_ci * tcp_lp_init 9262306a36Sopenharmony_ci * @sk: socket to initialize congestion control algorithm for 9362306a36Sopenharmony_ci * 9462306a36Sopenharmony_ci * Init all required variables. 9562306a36Sopenharmony_ci * Clone the handling from Vegas module implementation. 9662306a36Sopenharmony_ci */ 9762306a36Sopenharmony_cistatic void tcp_lp_init(struct sock *sk) 9862306a36Sopenharmony_ci{ 9962306a36Sopenharmony_ci struct lp *lp = inet_csk_ca(sk); 10062306a36Sopenharmony_ci 10162306a36Sopenharmony_ci lp->flag = 0; 10262306a36Sopenharmony_ci lp->sowd = 0; 10362306a36Sopenharmony_ci lp->owd_min = 0xffffffff; 10462306a36Sopenharmony_ci lp->owd_max = 0; 10562306a36Sopenharmony_ci lp->owd_max_rsv = 0; 10662306a36Sopenharmony_ci lp->remote_hz = 0; 10762306a36Sopenharmony_ci lp->remote_ref_time = 0; 10862306a36Sopenharmony_ci lp->local_ref_time = 0; 10962306a36Sopenharmony_ci lp->last_drop = 0; 11062306a36Sopenharmony_ci lp->inference = 0; 11162306a36Sopenharmony_ci} 11262306a36Sopenharmony_ci 11362306a36Sopenharmony_ci/** 11462306a36Sopenharmony_ci * tcp_lp_cong_avoid 11562306a36Sopenharmony_ci * @sk: socket to avoid congesting 11662306a36Sopenharmony_ci * 11762306a36Sopenharmony_ci * Implementation of cong_avoid. 11862306a36Sopenharmony_ci * Will only call newReno CA when away from inference. 11962306a36Sopenharmony_ci * From TCP-LP's paper, this will be handled in additive increasement. 12062306a36Sopenharmony_ci */ 12162306a36Sopenharmony_cistatic void tcp_lp_cong_avoid(struct sock *sk, u32 ack, u32 acked) 12262306a36Sopenharmony_ci{ 12362306a36Sopenharmony_ci struct lp *lp = inet_csk_ca(sk); 12462306a36Sopenharmony_ci 12562306a36Sopenharmony_ci if (!(lp->flag & LP_WITHIN_INF)) 12662306a36Sopenharmony_ci tcp_reno_cong_avoid(sk, ack, acked); 12762306a36Sopenharmony_ci} 12862306a36Sopenharmony_ci 12962306a36Sopenharmony_ci/** 13062306a36Sopenharmony_ci * tcp_lp_remote_hz_estimator 13162306a36Sopenharmony_ci * @sk: socket which needs an estimate for the remote HZs 13262306a36Sopenharmony_ci * 13362306a36Sopenharmony_ci * Estimate remote HZ. 13462306a36Sopenharmony_ci * We keep on updating the estimated value, where original TCP-LP 13562306a36Sopenharmony_ci * implementation only guest it for once and use forever. 13662306a36Sopenharmony_ci */ 13762306a36Sopenharmony_cistatic u32 tcp_lp_remote_hz_estimator(struct sock *sk) 13862306a36Sopenharmony_ci{ 13962306a36Sopenharmony_ci struct tcp_sock *tp = tcp_sk(sk); 14062306a36Sopenharmony_ci struct lp *lp = inet_csk_ca(sk); 14162306a36Sopenharmony_ci s64 rhz = lp->remote_hz << 6; /* remote HZ << 6 */ 14262306a36Sopenharmony_ci s64 m = 0; 14362306a36Sopenharmony_ci 14462306a36Sopenharmony_ci /* not yet record reference time 14562306a36Sopenharmony_ci * go away!! record it before come back!! */ 14662306a36Sopenharmony_ci if (lp->remote_ref_time == 0 || lp->local_ref_time == 0) 14762306a36Sopenharmony_ci goto out; 14862306a36Sopenharmony_ci 14962306a36Sopenharmony_ci /* we can't calc remote HZ with no different!! */ 15062306a36Sopenharmony_ci if (tp->rx_opt.rcv_tsval == lp->remote_ref_time || 15162306a36Sopenharmony_ci tp->rx_opt.rcv_tsecr == lp->local_ref_time) 15262306a36Sopenharmony_ci goto out; 15362306a36Sopenharmony_ci 15462306a36Sopenharmony_ci m = TCP_TS_HZ * 15562306a36Sopenharmony_ci (tp->rx_opt.rcv_tsval - lp->remote_ref_time) / 15662306a36Sopenharmony_ci (tp->rx_opt.rcv_tsecr - lp->local_ref_time); 15762306a36Sopenharmony_ci if (m < 0) 15862306a36Sopenharmony_ci m = -m; 15962306a36Sopenharmony_ci 16062306a36Sopenharmony_ci if (rhz > 0) { 16162306a36Sopenharmony_ci m -= rhz >> 6; /* m is now error in remote HZ est */ 16262306a36Sopenharmony_ci rhz += m; /* 63/64 old + 1/64 new */ 16362306a36Sopenharmony_ci } else 16462306a36Sopenharmony_ci rhz = m << 6; 16562306a36Sopenharmony_ci 16662306a36Sopenharmony_ci out: 16762306a36Sopenharmony_ci /* record time for successful remote HZ calc */ 16862306a36Sopenharmony_ci if ((rhz >> 6) > 0) 16962306a36Sopenharmony_ci lp->flag |= LP_VALID_RHZ; 17062306a36Sopenharmony_ci else 17162306a36Sopenharmony_ci lp->flag &= ~LP_VALID_RHZ; 17262306a36Sopenharmony_ci 17362306a36Sopenharmony_ci /* record reference time stamp */ 17462306a36Sopenharmony_ci lp->remote_ref_time = tp->rx_opt.rcv_tsval; 17562306a36Sopenharmony_ci lp->local_ref_time = tp->rx_opt.rcv_tsecr; 17662306a36Sopenharmony_ci 17762306a36Sopenharmony_ci return rhz >> 6; 17862306a36Sopenharmony_ci} 17962306a36Sopenharmony_ci 18062306a36Sopenharmony_ci/** 18162306a36Sopenharmony_ci * tcp_lp_owd_calculator 18262306a36Sopenharmony_ci * @sk: socket to calculate one way delay for 18362306a36Sopenharmony_ci * 18462306a36Sopenharmony_ci * Calculate one way delay (in relative format). 18562306a36Sopenharmony_ci * Original implement OWD as minus of remote time difference to local time 18662306a36Sopenharmony_ci * difference directly. As this time difference just simply equal to RTT, when 18762306a36Sopenharmony_ci * the network status is stable, remote RTT will equal to local RTT, and result 18862306a36Sopenharmony_ci * OWD into zero. 18962306a36Sopenharmony_ci * It seems to be a bug and so we fixed it. 19062306a36Sopenharmony_ci */ 19162306a36Sopenharmony_cistatic u32 tcp_lp_owd_calculator(struct sock *sk) 19262306a36Sopenharmony_ci{ 19362306a36Sopenharmony_ci struct tcp_sock *tp = tcp_sk(sk); 19462306a36Sopenharmony_ci struct lp *lp = inet_csk_ca(sk); 19562306a36Sopenharmony_ci s64 owd = 0; 19662306a36Sopenharmony_ci 19762306a36Sopenharmony_ci lp->remote_hz = tcp_lp_remote_hz_estimator(sk); 19862306a36Sopenharmony_ci 19962306a36Sopenharmony_ci if (lp->flag & LP_VALID_RHZ) { 20062306a36Sopenharmony_ci owd = 20162306a36Sopenharmony_ci tp->rx_opt.rcv_tsval * (LP_RESOL / lp->remote_hz) - 20262306a36Sopenharmony_ci tp->rx_opt.rcv_tsecr * (LP_RESOL / TCP_TS_HZ); 20362306a36Sopenharmony_ci if (owd < 0) 20462306a36Sopenharmony_ci owd = -owd; 20562306a36Sopenharmony_ci } 20662306a36Sopenharmony_ci 20762306a36Sopenharmony_ci if (owd > 0) 20862306a36Sopenharmony_ci lp->flag |= LP_VALID_OWD; 20962306a36Sopenharmony_ci else 21062306a36Sopenharmony_ci lp->flag &= ~LP_VALID_OWD; 21162306a36Sopenharmony_ci 21262306a36Sopenharmony_ci return owd; 21362306a36Sopenharmony_ci} 21462306a36Sopenharmony_ci 21562306a36Sopenharmony_ci/** 21662306a36Sopenharmony_ci * tcp_lp_rtt_sample 21762306a36Sopenharmony_ci * @sk: socket to add a rtt sample to 21862306a36Sopenharmony_ci * @rtt: round trip time, which is ignored! 21962306a36Sopenharmony_ci * 22062306a36Sopenharmony_ci * Implementation or rtt_sample. 22162306a36Sopenharmony_ci * Will take the following action, 22262306a36Sopenharmony_ci * 1. calc OWD, 22362306a36Sopenharmony_ci * 2. record the min/max OWD, 22462306a36Sopenharmony_ci * 3. calc smoothed OWD (SOWD). 22562306a36Sopenharmony_ci * Most ideas come from the original TCP-LP implementation. 22662306a36Sopenharmony_ci */ 22762306a36Sopenharmony_cistatic void tcp_lp_rtt_sample(struct sock *sk, u32 rtt) 22862306a36Sopenharmony_ci{ 22962306a36Sopenharmony_ci struct lp *lp = inet_csk_ca(sk); 23062306a36Sopenharmony_ci s64 mowd = tcp_lp_owd_calculator(sk); 23162306a36Sopenharmony_ci 23262306a36Sopenharmony_ci /* sorry that we don't have valid data */ 23362306a36Sopenharmony_ci if (!(lp->flag & LP_VALID_RHZ) || !(lp->flag & LP_VALID_OWD)) 23462306a36Sopenharmony_ci return; 23562306a36Sopenharmony_ci 23662306a36Sopenharmony_ci /* record the next min owd */ 23762306a36Sopenharmony_ci if (mowd < lp->owd_min) 23862306a36Sopenharmony_ci lp->owd_min = mowd; 23962306a36Sopenharmony_ci 24062306a36Sopenharmony_ci /* always forget the max of the max 24162306a36Sopenharmony_ci * we just set owd_max as one below it */ 24262306a36Sopenharmony_ci if (mowd > lp->owd_max) { 24362306a36Sopenharmony_ci if (mowd > lp->owd_max_rsv) { 24462306a36Sopenharmony_ci if (lp->owd_max_rsv == 0) 24562306a36Sopenharmony_ci lp->owd_max = mowd; 24662306a36Sopenharmony_ci else 24762306a36Sopenharmony_ci lp->owd_max = lp->owd_max_rsv; 24862306a36Sopenharmony_ci lp->owd_max_rsv = mowd; 24962306a36Sopenharmony_ci } else 25062306a36Sopenharmony_ci lp->owd_max = mowd; 25162306a36Sopenharmony_ci } 25262306a36Sopenharmony_ci 25362306a36Sopenharmony_ci /* calc for smoothed owd */ 25462306a36Sopenharmony_ci if (lp->sowd != 0) { 25562306a36Sopenharmony_ci mowd -= lp->sowd >> 3; /* m is now error in owd est */ 25662306a36Sopenharmony_ci lp->sowd += mowd; /* owd = 7/8 owd + 1/8 new */ 25762306a36Sopenharmony_ci } else 25862306a36Sopenharmony_ci lp->sowd = mowd << 3; /* take the measured time be owd */ 25962306a36Sopenharmony_ci} 26062306a36Sopenharmony_ci 26162306a36Sopenharmony_ci/** 26262306a36Sopenharmony_ci * tcp_lp_pkts_acked 26362306a36Sopenharmony_ci * @sk: socket requiring congestion avoidance calculations 26462306a36Sopenharmony_ci * 26562306a36Sopenharmony_ci * Implementation of pkts_acked. 26662306a36Sopenharmony_ci * Deal with active drop under Early Congestion Indication. 26762306a36Sopenharmony_ci * Only drop to half and 1 will be handle, because we hope to use back 26862306a36Sopenharmony_ci * newReno in increase case. 26962306a36Sopenharmony_ci * We work it out by following the idea from TCP-LP's paper directly 27062306a36Sopenharmony_ci */ 27162306a36Sopenharmony_cistatic void tcp_lp_pkts_acked(struct sock *sk, const struct ack_sample *sample) 27262306a36Sopenharmony_ci{ 27362306a36Sopenharmony_ci struct tcp_sock *tp = tcp_sk(sk); 27462306a36Sopenharmony_ci struct lp *lp = inet_csk_ca(sk); 27562306a36Sopenharmony_ci u32 now = tcp_time_stamp(tp); 27662306a36Sopenharmony_ci u32 delta; 27762306a36Sopenharmony_ci 27862306a36Sopenharmony_ci if (sample->rtt_us > 0) 27962306a36Sopenharmony_ci tcp_lp_rtt_sample(sk, sample->rtt_us); 28062306a36Sopenharmony_ci 28162306a36Sopenharmony_ci /* calc inference */ 28262306a36Sopenharmony_ci delta = now - tp->rx_opt.rcv_tsecr; 28362306a36Sopenharmony_ci if ((s32)delta > 0) 28462306a36Sopenharmony_ci lp->inference = 3 * delta; 28562306a36Sopenharmony_ci 28662306a36Sopenharmony_ci /* test if within inference */ 28762306a36Sopenharmony_ci if (lp->last_drop && (now - lp->last_drop < lp->inference)) 28862306a36Sopenharmony_ci lp->flag |= LP_WITHIN_INF; 28962306a36Sopenharmony_ci else 29062306a36Sopenharmony_ci lp->flag &= ~LP_WITHIN_INF; 29162306a36Sopenharmony_ci 29262306a36Sopenharmony_ci /* test if within threshold */ 29362306a36Sopenharmony_ci if (lp->sowd >> 3 < 29462306a36Sopenharmony_ci lp->owd_min + 15 * (lp->owd_max - lp->owd_min) / 100) 29562306a36Sopenharmony_ci lp->flag |= LP_WITHIN_THR; 29662306a36Sopenharmony_ci else 29762306a36Sopenharmony_ci lp->flag &= ~LP_WITHIN_THR; 29862306a36Sopenharmony_ci 29962306a36Sopenharmony_ci pr_debug("TCP-LP: %05o|%5u|%5u|%15u|%15u|%15u\n", lp->flag, 30062306a36Sopenharmony_ci tcp_snd_cwnd(tp), lp->remote_hz, lp->owd_min, lp->owd_max, 30162306a36Sopenharmony_ci lp->sowd >> 3); 30262306a36Sopenharmony_ci 30362306a36Sopenharmony_ci if (lp->flag & LP_WITHIN_THR) 30462306a36Sopenharmony_ci return; 30562306a36Sopenharmony_ci 30662306a36Sopenharmony_ci /* FIXME: try to reset owd_min and owd_max here 30762306a36Sopenharmony_ci * so decrease the chance the min/max is no longer suitable 30862306a36Sopenharmony_ci * and will usually within threshold when within inference */ 30962306a36Sopenharmony_ci lp->owd_min = lp->sowd >> 3; 31062306a36Sopenharmony_ci lp->owd_max = lp->sowd >> 2; 31162306a36Sopenharmony_ci lp->owd_max_rsv = lp->sowd >> 2; 31262306a36Sopenharmony_ci 31362306a36Sopenharmony_ci /* happened within inference 31462306a36Sopenharmony_ci * drop snd_cwnd into 1 */ 31562306a36Sopenharmony_ci if (lp->flag & LP_WITHIN_INF) 31662306a36Sopenharmony_ci tcp_snd_cwnd_set(tp, 1U); 31762306a36Sopenharmony_ci 31862306a36Sopenharmony_ci /* happened after inference 31962306a36Sopenharmony_ci * cut snd_cwnd into half */ 32062306a36Sopenharmony_ci else 32162306a36Sopenharmony_ci tcp_snd_cwnd_set(tp, max(tcp_snd_cwnd(tp) >> 1U, 1U)); 32262306a36Sopenharmony_ci 32362306a36Sopenharmony_ci /* record this drop time */ 32462306a36Sopenharmony_ci lp->last_drop = now; 32562306a36Sopenharmony_ci} 32662306a36Sopenharmony_ci 32762306a36Sopenharmony_cistatic struct tcp_congestion_ops tcp_lp __read_mostly = { 32862306a36Sopenharmony_ci .init = tcp_lp_init, 32962306a36Sopenharmony_ci .ssthresh = tcp_reno_ssthresh, 33062306a36Sopenharmony_ci .undo_cwnd = tcp_reno_undo_cwnd, 33162306a36Sopenharmony_ci .cong_avoid = tcp_lp_cong_avoid, 33262306a36Sopenharmony_ci .pkts_acked = tcp_lp_pkts_acked, 33362306a36Sopenharmony_ci 33462306a36Sopenharmony_ci .owner = THIS_MODULE, 33562306a36Sopenharmony_ci .name = "lp" 33662306a36Sopenharmony_ci}; 33762306a36Sopenharmony_ci 33862306a36Sopenharmony_cistatic int __init tcp_lp_register(void) 33962306a36Sopenharmony_ci{ 34062306a36Sopenharmony_ci BUILD_BUG_ON(sizeof(struct lp) > ICSK_CA_PRIV_SIZE); 34162306a36Sopenharmony_ci return tcp_register_congestion_control(&tcp_lp); 34262306a36Sopenharmony_ci} 34362306a36Sopenharmony_ci 34462306a36Sopenharmony_cistatic void __exit tcp_lp_unregister(void) 34562306a36Sopenharmony_ci{ 34662306a36Sopenharmony_ci tcp_unregister_congestion_control(&tcp_lp); 34762306a36Sopenharmony_ci} 34862306a36Sopenharmony_ci 34962306a36Sopenharmony_cimodule_init(tcp_lp_register); 35062306a36Sopenharmony_cimodule_exit(tcp_lp_unregister); 35162306a36Sopenharmony_ci 35262306a36Sopenharmony_ciMODULE_AUTHOR("Wong Hoi Sing Edison, Hung Hing Lun Mike"); 35362306a36Sopenharmony_ciMODULE_LICENSE("GPL"); 35462306a36Sopenharmony_ciMODULE_DESCRIPTION("TCP Low Priority"); 355