162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * TCP Illinois congestion control.
462306a36Sopenharmony_ci * Home page:
562306a36Sopenharmony_ci *	http://www.ews.uiuc.edu/~shaoliu/tcpillinois/index.html
662306a36Sopenharmony_ci *
762306a36Sopenharmony_ci * The algorithm is described in:
862306a36Sopenharmony_ci * "TCP-Illinois: A Loss and Delay-Based Congestion Control Algorithm
962306a36Sopenharmony_ci *  for High-Speed Networks"
1062306a36Sopenharmony_ci * http://tamerbasar.csl.illinois.edu/LiuBasarSrikantPerfEvalArtJun2008.pdf
1162306a36Sopenharmony_ci *
1262306a36Sopenharmony_ci * Implemented from description in paper and ns-2 simulation.
1362306a36Sopenharmony_ci * Copyright (C) 2007 Stephen Hemminger <shemminger@linux-foundation.org>
1462306a36Sopenharmony_ci */
1562306a36Sopenharmony_ci
1662306a36Sopenharmony_ci#include <linux/module.h>
1762306a36Sopenharmony_ci#include <linux/skbuff.h>
1862306a36Sopenharmony_ci#include <linux/inet_diag.h>
1962306a36Sopenharmony_ci#include <asm/div64.h>
2062306a36Sopenharmony_ci#include <net/tcp.h>
2162306a36Sopenharmony_ci
2262306a36Sopenharmony_ci#define ALPHA_SHIFT	7
2362306a36Sopenharmony_ci#define ALPHA_SCALE	(1u<<ALPHA_SHIFT)
2462306a36Sopenharmony_ci#define ALPHA_MIN	((3*ALPHA_SCALE)/10)	/* ~0.3 */
2562306a36Sopenharmony_ci#define ALPHA_MAX	(10*ALPHA_SCALE)	/* 10.0 */
2662306a36Sopenharmony_ci#define ALPHA_BASE	ALPHA_SCALE		/* 1.0 */
2762306a36Sopenharmony_ci#define RTT_MAX		(U32_MAX / ALPHA_MAX)	/* 3.3 secs */
2862306a36Sopenharmony_ci
2962306a36Sopenharmony_ci#define BETA_SHIFT	6
3062306a36Sopenharmony_ci#define BETA_SCALE	(1u<<BETA_SHIFT)
3162306a36Sopenharmony_ci#define BETA_MIN	(BETA_SCALE/8)		/* 0.125 */
3262306a36Sopenharmony_ci#define BETA_MAX	(BETA_SCALE/2)		/* 0.5 */
3362306a36Sopenharmony_ci#define BETA_BASE	BETA_MAX
3462306a36Sopenharmony_ci
3562306a36Sopenharmony_cistatic int win_thresh __read_mostly = 15;
3662306a36Sopenharmony_cimodule_param(win_thresh, int, 0);
3762306a36Sopenharmony_ciMODULE_PARM_DESC(win_thresh, "Window threshold for starting adaptive sizing");
3862306a36Sopenharmony_ci
3962306a36Sopenharmony_cistatic int theta __read_mostly = 5;
4062306a36Sopenharmony_cimodule_param(theta, int, 0);
4162306a36Sopenharmony_ciMODULE_PARM_DESC(theta, "# of fast RTT's before full growth");
4262306a36Sopenharmony_ci
4362306a36Sopenharmony_ci/* TCP Illinois Parameters */
4462306a36Sopenharmony_cistruct illinois {
4562306a36Sopenharmony_ci	u64	sum_rtt;	/* sum of rtt's measured within last rtt */
4662306a36Sopenharmony_ci	u16	cnt_rtt;	/* # of rtts measured within last rtt */
4762306a36Sopenharmony_ci	u32	base_rtt;	/* min of all rtt in usec */
4862306a36Sopenharmony_ci	u32	max_rtt;	/* max of all rtt in usec */
4962306a36Sopenharmony_ci	u32	end_seq;	/* right edge of current RTT */
5062306a36Sopenharmony_ci	u32	alpha;		/* Additive increase */
5162306a36Sopenharmony_ci	u32	beta;		/* Muliplicative decrease */
5262306a36Sopenharmony_ci	u16	acked;		/* # packets acked by current ACK */
5362306a36Sopenharmony_ci	u8	rtt_above;	/* average rtt has gone above threshold */
5462306a36Sopenharmony_ci	u8	rtt_low;	/* # of rtts measurements below threshold */
5562306a36Sopenharmony_ci};
5662306a36Sopenharmony_ci
5762306a36Sopenharmony_cistatic void rtt_reset(struct sock *sk)
5862306a36Sopenharmony_ci{
5962306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
6062306a36Sopenharmony_ci	struct illinois *ca = inet_csk_ca(sk);
6162306a36Sopenharmony_ci
6262306a36Sopenharmony_ci	ca->end_seq = tp->snd_nxt;
6362306a36Sopenharmony_ci	ca->cnt_rtt = 0;
6462306a36Sopenharmony_ci	ca->sum_rtt = 0;
6562306a36Sopenharmony_ci
6662306a36Sopenharmony_ci	/* TODO: age max_rtt? */
6762306a36Sopenharmony_ci}
6862306a36Sopenharmony_ci
6962306a36Sopenharmony_cistatic void tcp_illinois_init(struct sock *sk)
7062306a36Sopenharmony_ci{
7162306a36Sopenharmony_ci	struct illinois *ca = inet_csk_ca(sk);
7262306a36Sopenharmony_ci
7362306a36Sopenharmony_ci	ca->alpha = ALPHA_MAX;
7462306a36Sopenharmony_ci	ca->beta = BETA_BASE;
7562306a36Sopenharmony_ci	ca->base_rtt = 0x7fffffff;
7662306a36Sopenharmony_ci	ca->max_rtt = 0;
7762306a36Sopenharmony_ci
7862306a36Sopenharmony_ci	ca->acked = 0;
7962306a36Sopenharmony_ci	ca->rtt_low = 0;
8062306a36Sopenharmony_ci	ca->rtt_above = 0;
8162306a36Sopenharmony_ci
8262306a36Sopenharmony_ci	rtt_reset(sk);
8362306a36Sopenharmony_ci}
8462306a36Sopenharmony_ci
8562306a36Sopenharmony_ci/* Measure RTT for each ack. */
8662306a36Sopenharmony_cistatic void tcp_illinois_acked(struct sock *sk, const struct ack_sample *sample)
8762306a36Sopenharmony_ci{
8862306a36Sopenharmony_ci	struct illinois *ca = inet_csk_ca(sk);
8962306a36Sopenharmony_ci	s32 rtt_us = sample->rtt_us;
9062306a36Sopenharmony_ci
9162306a36Sopenharmony_ci	ca->acked = sample->pkts_acked;
9262306a36Sopenharmony_ci
9362306a36Sopenharmony_ci	/* dup ack, no rtt sample */
9462306a36Sopenharmony_ci	if (rtt_us < 0)
9562306a36Sopenharmony_ci		return;
9662306a36Sopenharmony_ci
9762306a36Sopenharmony_ci	/* ignore bogus values, this prevents wraparound in alpha math */
9862306a36Sopenharmony_ci	if (rtt_us > RTT_MAX)
9962306a36Sopenharmony_ci		rtt_us = RTT_MAX;
10062306a36Sopenharmony_ci
10162306a36Sopenharmony_ci	/* keep track of minimum RTT seen so far */
10262306a36Sopenharmony_ci	if (ca->base_rtt > rtt_us)
10362306a36Sopenharmony_ci		ca->base_rtt = rtt_us;
10462306a36Sopenharmony_ci
10562306a36Sopenharmony_ci	/* and max */
10662306a36Sopenharmony_ci	if (ca->max_rtt < rtt_us)
10762306a36Sopenharmony_ci		ca->max_rtt = rtt_us;
10862306a36Sopenharmony_ci
10962306a36Sopenharmony_ci	++ca->cnt_rtt;
11062306a36Sopenharmony_ci	ca->sum_rtt += rtt_us;
11162306a36Sopenharmony_ci}
11262306a36Sopenharmony_ci
11362306a36Sopenharmony_ci/* Maximum queuing delay */
11462306a36Sopenharmony_cistatic inline u32 max_delay(const struct illinois *ca)
11562306a36Sopenharmony_ci{
11662306a36Sopenharmony_ci	return ca->max_rtt - ca->base_rtt;
11762306a36Sopenharmony_ci}
11862306a36Sopenharmony_ci
11962306a36Sopenharmony_ci/* Average queuing delay */
12062306a36Sopenharmony_cistatic inline u32 avg_delay(const struct illinois *ca)
12162306a36Sopenharmony_ci{
12262306a36Sopenharmony_ci	u64 t = ca->sum_rtt;
12362306a36Sopenharmony_ci
12462306a36Sopenharmony_ci	do_div(t, ca->cnt_rtt);
12562306a36Sopenharmony_ci	return t - ca->base_rtt;
12662306a36Sopenharmony_ci}
12762306a36Sopenharmony_ci
12862306a36Sopenharmony_ci/*
12962306a36Sopenharmony_ci * Compute value of alpha used for additive increase.
13062306a36Sopenharmony_ci * If small window then use 1.0, equivalent to Reno.
13162306a36Sopenharmony_ci *
13262306a36Sopenharmony_ci * For larger windows, adjust based on average delay.
13362306a36Sopenharmony_ci * A. If average delay is at minimum (we are uncongested),
13462306a36Sopenharmony_ci *    then use large alpha (10.0) to increase faster.
13562306a36Sopenharmony_ci * B. If average delay is at maximum (getting congested)
13662306a36Sopenharmony_ci *    then use small alpha (0.3)
13762306a36Sopenharmony_ci *
13862306a36Sopenharmony_ci * The result is a convex window growth curve.
13962306a36Sopenharmony_ci */
14062306a36Sopenharmony_cistatic u32 alpha(struct illinois *ca, u32 da, u32 dm)
14162306a36Sopenharmony_ci{
14262306a36Sopenharmony_ci	u32 d1 = dm / 100;	/* Low threshold */
14362306a36Sopenharmony_ci
14462306a36Sopenharmony_ci	if (da <= d1) {
14562306a36Sopenharmony_ci		/* If never got out of low delay zone, then use max */
14662306a36Sopenharmony_ci		if (!ca->rtt_above)
14762306a36Sopenharmony_ci			return ALPHA_MAX;
14862306a36Sopenharmony_ci
14962306a36Sopenharmony_ci		/* Wait for 5 good RTT's before allowing alpha to go alpha max.
15062306a36Sopenharmony_ci		 * This prevents one good RTT from causing sudden window increase.
15162306a36Sopenharmony_ci		 */
15262306a36Sopenharmony_ci		if (++ca->rtt_low < theta)
15362306a36Sopenharmony_ci			return ca->alpha;
15462306a36Sopenharmony_ci
15562306a36Sopenharmony_ci		ca->rtt_low = 0;
15662306a36Sopenharmony_ci		ca->rtt_above = 0;
15762306a36Sopenharmony_ci		return ALPHA_MAX;
15862306a36Sopenharmony_ci	}
15962306a36Sopenharmony_ci
16062306a36Sopenharmony_ci	ca->rtt_above = 1;
16162306a36Sopenharmony_ci
16262306a36Sopenharmony_ci	/*
16362306a36Sopenharmony_ci	 * Based on:
16462306a36Sopenharmony_ci	 *
16562306a36Sopenharmony_ci	 *      (dm - d1) amin amax
16662306a36Sopenharmony_ci	 * k1 = -------------------
16762306a36Sopenharmony_ci	 *         amax - amin
16862306a36Sopenharmony_ci	 *
16962306a36Sopenharmony_ci	 *       (dm - d1) amin
17062306a36Sopenharmony_ci	 * k2 = ----------------  - d1
17162306a36Sopenharmony_ci	 *        amax - amin
17262306a36Sopenharmony_ci	 *
17362306a36Sopenharmony_ci	 *             k1
17462306a36Sopenharmony_ci	 * alpha = ----------
17562306a36Sopenharmony_ci	 *          k2 + da
17662306a36Sopenharmony_ci	 */
17762306a36Sopenharmony_ci
17862306a36Sopenharmony_ci	dm -= d1;
17962306a36Sopenharmony_ci	da -= d1;
18062306a36Sopenharmony_ci	return (dm * ALPHA_MAX) /
18162306a36Sopenharmony_ci		(dm + (da  * (ALPHA_MAX - ALPHA_MIN)) / ALPHA_MIN);
18262306a36Sopenharmony_ci}
18362306a36Sopenharmony_ci
18462306a36Sopenharmony_ci/*
18562306a36Sopenharmony_ci * Beta used for multiplicative decrease.
18662306a36Sopenharmony_ci * For small window sizes returns same value as Reno (0.5)
18762306a36Sopenharmony_ci *
18862306a36Sopenharmony_ci * If delay is small (10% of max) then beta = 1/8
18962306a36Sopenharmony_ci * If delay is up to 80% of max then beta = 1/2
19062306a36Sopenharmony_ci * In between is a linear function
19162306a36Sopenharmony_ci */
19262306a36Sopenharmony_cistatic u32 beta(u32 da, u32 dm)
19362306a36Sopenharmony_ci{
19462306a36Sopenharmony_ci	u32 d2, d3;
19562306a36Sopenharmony_ci
19662306a36Sopenharmony_ci	d2 = dm / 10;
19762306a36Sopenharmony_ci	if (da <= d2)
19862306a36Sopenharmony_ci		return BETA_MIN;
19962306a36Sopenharmony_ci
20062306a36Sopenharmony_ci	d3 = (8 * dm) / 10;
20162306a36Sopenharmony_ci	if (da >= d3 || d3 <= d2)
20262306a36Sopenharmony_ci		return BETA_MAX;
20362306a36Sopenharmony_ci
20462306a36Sopenharmony_ci	/*
20562306a36Sopenharmony_ci	 * Based on:
20662306a36Sopenharmony_ci	 *
20762306a36Sopenharmony_ci	 *       bmin d3 - bmax d2
20862306a36Sopenharmony_ci	 * k3 = -------------------
20962306a36Sopenharmony_ci	 *         d3 - d2
21062306a36Sopenharmony_ci	 *
21162306a36Sopenharmony_ci	 *       bmax - bmin
21262306a36Sopenharmony_ci	 * k4 = -------------
21362306a36Sopenharmony_ci	 *         d3 - d2
21462306a36Sopenharmony_ci	 *
21562306a36Sopenharmony_ci	 * b = k3 + k4 da
21662306a36Sopenharmony_ci	 */
21762306a36Sopenharmony_ci	return (BETA_MIN * d3 - BETA_MAX * d2 + (BETA_MAX - BETA_MIN) * da)
21862306a36Sopenharmony_ci		/ (d3 - d2);
21962306a36Sopenharmony_ci}
22062306a36Sopenharmony_ci
22162306a36Sopenharmony_ci/* Update alpha and beta values once per RTT */
22262306a36Sopenharmony_cistatic void update_params(struct sock *sk)
22362306a36Sopenharmony_ci{
22462306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
22562306a36Sopenharmony_ci	struct illinois *ca = inet_csk_ca(sk);
22662306a36Sopenharmony_ci
22762306a36Sopenharmony_ci	if (tcp_snd_cwnd(tp) < win_thresh) {
22862306a36Sopenharmony_ci		ca->alpha = ALPHA_BASE;
22962306a36Sopenharmony_ci		ca->beta = BETA_BASE;
23062306a36Sopenharmony_ci	} else if (ca->cnt_rtt > 0) {
23162306a36Sopenharmony_ci		u32 dm = max_delay(ca);
23262306a36Sopenharmony_ci		u32 da = avg_delay(ca);
23362306a36Sopenharmony_ci
23462306a36Sopenharmony_ci		ca->alpha = alpha(ca, da, dm);
23562306a36Sopenharmony_ci		ca->beta = beta(da, dm);
23662306a36Sopenharmony_ci	}
23762306a36Sopenharmony_ci
23862306a36Sopenharmony_ci	rtt_reset(sk);
23962306a36Sopenharmony_ci}
24062306a36Sopenharmony_ci
24162306a36Sopenharmony_ci/*
24262306a36Sopenharmony_ci * In case of loss, reset to default values
24362306a36Sopenharmony_ci */
24462306a36Sopenharmony_cistatic void tcp_illinois_state(struct sock *sk, u8 new_state)
24562306a36Sopenharmony_ci{
24662306a36Sopenharmony_ci	struct illinois *ca = inet_csk_ca(sk);
24762306a36Sopenharmony_ci
24862306a36Sopenharmony_ci	if (new_state == TCP_CA_Loss) {
24962306a36Sopenharmony_ci		ca->alpha = ALPHA_BASE;
25062306a36Sopenharmony_ci		ca->beta = BETA_BASE;
25162306a36Sopenharmony_ci		ca->rtt_low = 0;
25262306a36Sopenharmony_ci		ca->rtt_above = 0;
25362306a36Sopenharmony_ci		rtt_reset(sk);
25462306a36Sopenharmony_ci	}
25562306a36Sopenharmony_ci}
25662306a36Sopenharmony_ci
25762306a36Sopenharmony_ci/*
25862306a36Sopenharmony_ci * Increase window in response to successful acknowledgment.
25962306a36Sopenharmony_ci */
26062306a36Sopenharmony_cistatic void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 acked)
26162306a36Sopenharmony_ci{
26262306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
26362306a36Sopenharmony_ci	struct illinois *ca = inet_csk_ca(sk);
26462306a36Sopenharmony_ci
26562306a36Sopenharmony_ci	if (after(ack, ca->end_seq))
26662306a36Sopenharmony_ci		update_params(sk);
26762306a36Sopenharmony_ci
26862306a36Sopenharmony_ci	/* RFC2861 only increase cwnd if fully utilized */
26962306a36Sopenharmony_ci	if (!tcp_is_cwnd_limited(sk))
27062306a36Sopenharmony_ci		return;
27162306a36Sopenharmony_ci
27262306a36Sopenharmony_ci	/* In slow start */
27362306a36Sopenharmony_ci	if (tcp_in_slow_start(tp))
27462306a36Sopenharmony_ci		tcp_slow_start(tp, acked);
27562306a36Sopenharmony_ci
27662306a36Sopenharmony_ci	else {
27762306a36Sopenharmony_ci		u32 delta;
27862306a36Sopenharmony_ci
27962306a36Sopenharmony_ci		/* snd_cwnd_cnt is # of packets since last cwnd increment */
28062306a36Sopenharmony_ci		tp->snd_cwnd_cnt += ca->acked;
28162306a36Sopenharmony_ci		ca->acked = 1;
28262306a36Sopenharmony_ci
28362306a36Sopenharmony_ci		/* This is close approximation of:
28462306a36Sopenharmony_ci		 * tp->snd_cwnd += alpha/tp->snd_cwnd
28562306a36Sopenharmony_ci		*/
28662306a36Sopenharmony_ci		delta = (tp->snd_cwnd_cnt * ca->alpha) >> ALPHA_SHIFT;
28762306a36Sopenharmony_ci		if (delta >= tcp_snd_cwnd(tp)) {
28862306a36Sopenharmony_ci			tcp_snd_cwnd_set(tp, min(tcp_snd_cwnd(tp) + delta / tcp_snd_cwnd(tp),
28962306a36Sopenharmony_ci						 (u32)tp->snd_cwnd_clamp));
29062306a36Sopenharmony_ci			tp->snd_cwnd_cnt = 0;
29162306a36Sopenharmony_ci		}
29262306a36Sopenharmony_ci	}
29362306a36Sopenharmony_ci}
29462306a36Sopenharmony_ci
29562306a36Sopenharmony_cistatic u32 tcp_illinois_ssthresh(struct sock *sk)
29662306a36Sopenharmony_ci{
29762306a36Sopenharmony_ci	struct tcp_sock *tp = tcp_sk(sk);
29862306a36Sopenharmony_ci	struct illinois *ca = inet_csk_ca(sk);
29962306a36Sopenharmony_ci	u32 decr;
30062306a36Sopenharmony_ci
30162306a36Sopenharmony_ci	/* Multiplicative decrease */
30262306a36Sopenharmony_ci	decr = (tcp_snd_cwnd(tp) * ca->beta) >> BETA_SHIFT;
30362306a36Sopenharmony_ci	return max(tcp_snd_cwnd(tp) - decr, 2U);
30462306a36Sopenharmony_ci}
30562306a36Sopenharmony_ci
30662306a36Sopenharmony_ci/* Extract info for Tcp socket info provided via netlink. */
30762306a36Sopenharmony_cistatic size_t tcp_illinois_info(struct sock *sk, u32 ext, int *attr,
30862306a36Sopenharmony_ci				union tcp_cc_info *info)
30962306a36Sopenharmony_ci{
31062306a36Sopenharmony_ci	const struct illinois *ca = inet_csk_ca(sk);
31162306a36Sopenharmony_ci
31262306a36Sopenharmony_ci	if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) {
31362306a36Sopenharmony_ci		info->vegas.tcpv_enabled = 1;
31462306a36Sopenharmony_ci		info->vegas.tcpv_rttcnt = ca->cnt_rtt;
31562306a36Sopenharmony_ci		info->vegas.tcpv_minrtt = ca->base_rtt;
31662306a36Sopenharmony_ci		info->vegas.tcpv_rtt = 0;
31762306a36Sopenharmony_ci
31862306a36Sopenharmony_ci		if (info->vegas.tcpv_rttcnt > 0) {
31962306a36Sopenharmony_ci			u64 t = ca->sum_rtt;
32062306a36Sopenharmony_ci
32162306a36Sopenharmony_ci			do_div(t, info->vegas.tcpv_rttcnt);
32262306a36Sopenharmony_ci			info->vegas.tcpv_rtt = t;
32362306a36Sopenharmony_ci		}
32462306a36Sopenharmony_ci		*attr = INET_DIAG_VEGASINFO;
32562306a36Sopenharmony_ci		return sizeof(struct tcpvegas_info);
32662306a36Sopenharmony_ci	}
32762306a36Sopenharmony_ci	return 0;
32862306a36Sopenharmony_ci}
32962306a36Sopenharmony_ci
33062306a36Sopenharmony_cistatic struct tcp_congestion_ops tcp_illinois __read_mostly = {
33162306a36Sopenharmony_ci	.init		= tcp_illinois_init,
33262306a36Sopenharmony_ci	.ssthresh	= tcp_illinois_ssthresh,
33362306a36Sopenharmony_ci	.undo_cwnd	= tcp_reno_undo_cwnd,
33462306a36Sopenharmony_ci	.cong_avoid	= tcp_illinois_cong_avoid,
33562306a36Sopenharmony_ci	.set_state	= tcp_illinois_state,
33662306a36Sopenharmony_ci	.get_info	= tcp_illinois_info,
33762306a36Sopenharmony_ci	.pkts_acked	= tcp_illinois_acked,
33862306a36Sopenharmony_ci
33962306a36Sopenharmony_ci	.owner		= THIS_MODULE,
34062306a36Sopenharmony_ci	.name		= "illinois",
34162306a36Sopenharmony_ci};
34262306a36Sopenharmony_ci
34362306a36Sopenharmony_cistatic int __init tcp_illinois_register(void)
34462306a36Sopenharmony_ci{
34562306a36Sopenharmony_ci	BUILD_BUG_ON(sizeof(struct illinois) > ICSK_CA_PRIV_SIZE);
34662306a36Sopenharmony_ci	return tcp_register_congestion_control(&tcp_illinois);
34762306a36Sopenharmony_ci}
34862306a36Sopenharmony_ci
34962306a36Sopenharmony_cistatic void __exit tcp_illinois_unregister(void)
35062306a36Sopenharmony_ci{
35162306a36Sopenharmony_ci	tcp_unregister_congestion_control(&tcp_illinois);
35262306a36Sopenharmony_ci}
35362306a36Sopenharmony_ci
35462306a36Sopenharmony_cimodule_init(tcp_illinois_register);
35562306a36Sopenharmony_cimodule_exit(tcp_illinois_unregister);
35662306a36Sopenharmony_ci
35762306a36Sopenharmony_ciMODULE_AUTHOR("Stephen Hemminger, Shao Liu");
35862306a36Sopenharmony_ciMODULE_LICENSE("GPL");
35962306a36Sopenharmony_ciMODULE_DESCRIPTION("TCP Illinois");
36062306a36Sopenharmony_ciMODULE_VERSION("1.0");
361