162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * net/sched/sch_tbf.c	Token Bucket Filter queue.
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
662306a36Sopenharmony_ci *		Dmitry Torokhov <dtor@mail.ru> - allow attaching inner qdiscs -
762306a36Sopenharmony_ci *						 original idea by Martin Devera
862306a36Sopenharmony_ci */
962306a36Sopenharmony_ci
1062306a36Sopenharmony_ci#include <linux/module.h>
1162306a36Sopenharmony_ci#include <linux/types.h>
1262306a36Sopenharmony_ci#include <linux/kernel.h>
1362306a36Sopenharmony_ci#include <linux/string.h>
1462306a36Sopenharmony_ci#include <linux/errno.h>
1562306a36Sopenharmony_ci#include <linux/skbuff.h>
1662306a36Sopenharmony_ci#include <net/gso.h>
1762306a36Sopenharmony_ci#include <net/netlink.h>
1862306a36Sopenharmony_ci#include <net/sch_generic.h>
1962306a36Sopenharmony_ci#include <net/pkt_cls.h>
2062306a36Sopenharmony_ci#include <net/pkt_sched.h>
2162306a36Sopenharmony_ci
2262306a36Sopenharmony_ci
2362306a36Sopenharmony_ci/*	Simple Token Bucket Filter.
2462306a36Sopenharmony_ci	=======================================
2562306a36Sopenharmony_ci
2662306a36Sopenharmony_ci	SOURCE.
2762306a36Sopenharmony_ci	-------
2862306a36Sopenharmony_ci
2962306a36Sopenharmony_ci	None.
3062306a36Sopenharmony_ci
3162306a36Sopenharmony_ci	Description.
3262306a36Sopenharmony_ci	------------
3362306a36Sopenharmony_ci
3462306a36Sopenharmony_ci	A data flow obeys TBF with rate R and depth B, if for any
3562306a36Sopenharmony_ci	time interval t_i...t_f the number of transmitted bits
3662306a36Sopenharmony_ci	does not exceed B + R*(t_f-t_i).
3762306a36Sopenharmony_ci
3862306a36Sopenharmony_ci	Packetized version of this definition:
3962306a36Sopenharmony_ci	The sequence of packets of sizes s_i served at moments t_i
4062306a36Sopenharmony_ci	obeys TBF, if for any i<=k:
4162306a36Sopenharmony_ci
4262306a36Sopenharmony_ci	s_i+....+s_k <= B + R*(t_k - t_i)
4362306a36Sopenharmony_ci
4462306a36Sopenharmony_ci	Algorithm.
4562306a36Sopenharmony_ci	----------
4662306a36Sopenharmony_ci
4762306a36Sopenharmony_ci	Let N(t_i) be B/R initially and N(t) grow continuously with time as:
4862306a36Sopenharmony_ci
4962306a36Sopenharmony_ci	N(t+delta) = min{B/R, N(t) + delta}
5062306a36Sopenharmony_ci
5162306a36Sopenharmony_ci	If the first packet in queue has length S, it may be
5262306a36Sopenharmony_ci	transmitted only at the time t_* when S/R <= N(t_*),
5362306a36Sopenharmony_ci	and in this case N(t) jumps:
5462306a36Sopenharmony_ci
5562306a36Sopenharmony_ci	N(t_* + 0) = N(t_* - 0) - S/R.
5662306a36Sopenharmony_ci
5762306a36Sopenharmony_ci
5862306a36Sopenharmony_ci
5962306a36Sopenharmony_ci	Actually, QoS requires two TBF to be applied to a data stream.
6062306a36Sopenharmony_ci	One of them controls steady state burst size, another
6162306a36Sopenharmony_ci	one with rate P (peak rate) and depth M (equal to link MTU)
6262306a36Sopenharmony_ci	limits bursts at a smaller time scale.
6362306a36Sopenharmony_ci
6462306a36Sopenharmony_ci	It is easy to see that P>R, and B>M. If P is infinity, this double
6562306a36Sopenharmony_ci	TBF is equivalent to a single one.
6662306a36Sopenharmony_ci
6762306a36Sopenharmony_ci	When TBF works in reshaping mode, latency is estimated as:
6862306a36Sopenharmony_ci
6962306a36Sopenharmony_ci	lat = max ((L-B)/R, (L-M)/P)
7062306a36Sopenharmony_ci
7162306a36Sopenharmony_ci
7262306a36Sopenharmony_ci	NOTES.
7362306a36Sopenharmony_ci	------
7462306a36Sopenharmony_ci
7562306a36Sopenharmony_ci	If TBF throttles, it starts a watchdog timer, which will wake it up
7662306a36Sopenharmony_ci	when it is ready to transmit.
7762306a36Sopenharmony_ci	Note that the minimal timer resolution is 1/HZ.
7862306a36Sopenharmony_ci	If no new packets arrive during this period,
7962306a36Sopenharmony_ci	or if the device is not awaken by EOI for some previous packet,
8062306a36Sopenharmony_ci	TBF can stop its activity for 1/HZ.
8162306a36Sopenharmony_ci
8262306a36Sopenharmony_ci
8362306a36Sopenharmony_ci	This means, that with depth B, the maximal rate is
8462306a36Sopenharmony_ci
8562306a36Sopenharmony_ci	R_crit = B*HZ
8662306a36Sopenharmony_ci
8762306a36Sopenharmony_ci	F.e. for 10Mbit ethernet and HZ=100 the minimal allowed B is ~10Kbytes.
8862306a36Sopenharmony_ci
8962306a36Sopenharmony_ci	Note that the peak rate TBF is much more tough: with MTU 1500
9062306a36Sopenharmony_ci	P_crit = 150Kbytes/sec. So, if you need greater peak
9162306a36Sopenharmony_ci	rates, use alpha with HZ=1000 :-)
9262306a36Sopenharmony_ci
9362306a36Sopenharmony_ci	With classful TBF, limit is just kept for backwards compatibility.
9462306a36Sopenharmony_ci	It is passed to the default bfifo qdisc - if the inner qdisc is
9562306a36Sopenharmony_ci	changed the limit is not effective anymore.
9662306a36Sopenharmony_ci*/
9762306a36Sopenharmony_ci
9862306a36Sopenharmony_cistruct tbf_sched_data {
9962306a36Sopenharmony_ci/* Parameters */
10062306a36Sopenharmony_ci	u32		limit;		/* Maximal length of backlog: bytes */
10162306a36Sopenharmony_ci	u32		max_size;
10262306a36Sopenharmony_ci	s64		buffer;		/* Token bucket depth/rate: MUST BE >= MTU/B */
10362306a36Sopenharmony_ci	s64		mtu;
10462306a36Sopenharmony_ci	struct psched_ratecfg rate;
10562306a36Sopenharmony_ci	struct psched_ratecfg peak;
10662306a36Sopenharmony_ci
10762306a36Sopenharmony_ci/* Variables */
10862306a36Sopenharmony_ci	s64	tokens;			/* Current number of B tokens */
10962306a36Sopenharmony_ci	s64	ptokens;		/* Current number of P tokens */
11062306a36Sopenharmony_ci	s64	t_c;			/* Time check-point */
11162306a36Sopenharmony_ci	struct Qdisc	*qdisc;		/* Inner qdisc, default - bfifo queue */
11262306a36Sopenharmony_ci	struct qdisc_watchdog watchdog;	/* Watchdog timer */
11362306a36Sopenharmony_ci};
11462306a36Sopenharmony_ci
11562306a36Sopenharmony_ci
11662306a36Sopenharmony_ci/* Time to Length, convert time in ns to length in bytes
11762306a36Sopenharmony_ci * to determinate how many bytes can be sent in given time.
11862306a36Sopenharmony_ci */
11962306a36Sopenharmony_cistatic u64 psched_ns_t2l(const struct psched_ratecfg *r,
12062306a36Sopenharmony_ci			 u64 time_in_ns)
12162306a36Sopenharmony_ci{
12262306a36Sopenharmony_ci	/* The formula is :
12362306a36Sopenharmony_ci	 * len = (time_in_ns * r->rate_bytes_ps) / NSEC_PER_SEC
12462306a36Sopenharmony_ci	 */
12562306a36Sopenharmony_ci	u64 len = time_in_ns * r->rate_bytes_ps;
12662306a36Sopenharmony_ci
12762306a36Sopenharmony_ci	do_div(len, NSEC_PER_SEC);
12862306a36Sopenharmony_ci
12962306a36Sopenharmony_ci	if (unlikely(r->linklayer == TC_LINKLAYER_ATM)) {
13062306a36Sopenharmony_ci		do_div(len, 53);
13162306a36Sopenharmony_ci		len = len * 48;
13262306a36Sopenharmony_ci	}
13362306a36Sopenharmony_ci
13462306a36Sopenharmony_ci	if (len > r->overhead)
13562306a36Sopenharmony_ci		len -= r->overhead;
13662306a36Sopenharmony_ci	else
13762306a36Sopenharmony_ci		len = 0;
13862306a36Sopenharmony_ci
13962306a36Sopenharmony_ci	return len;
14062306a36Sopenharmony_ci}
14162306a36Sopenharmony_ci
14262306a36Sopenharmony_cistatic void tbf_offload_change(struct Qdisc *sch)
14362306a36Sopenharmony_ci{
14462306a36Sopenharmony_ci	struct tbf_sched_data *q = qdisc_priv(sch);
14562306a36Sopenharmony_ci	struct net_device *dev = qdisc_dev(sch);
14662306a36Sopenharmony_ci	struct tc_tbf_qopt_offload qopt;
14762306a36Sopenharmony_ci
14862306a36Sopenharmony_ci	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
14962306a36Sopenharmony_ci		return;
15062306a36Sopenharmony_ci
15162306a36Sopenharmony_ci	qopt.command = TC_TBF_REPLACE;
15262306a36Sopenharmony_ci	qopt.handle = sch->handle;
15362306a36Sopenharmony_ci	qopt.parent = sch->parent;
15462306a36Sopenharmony_ci	qopt.replace_params.rate = q->rate;
15562306a36Sopenharmony_ci	qopt.replace_params.max_size = q->max_size;
15662306a36Sopenharmony_ci	qopt.replace_params.qstats = &sch->qstats;
15762306a36Sopenharmony_ci
15862306a36Sopenharmony_ci	dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TBF, &qopt);
15962306a36Sopenharmony_ci}
16062306a36Sopenharmony_ci
16162306a36Sopenharmony_cistatic void tbf_offload_destroy(struct Qdisc *sch)
16262306a36Sopenharmony_ci{
16362306a36Sopenharmony_ci	struct net_device *dev = qdisc_dev(sch);
16462306a36Sopenharmony_ci	struct tc_tbf_qopt_offload qopt;
16562306a36Sopenharmony_ci
16662306a36Sopenharmony_ci	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
16762306a36Sopenharmony_ci		return;
16862306a36Sopenharmony_ci
16962306a36Sopenharmony_ci	qopt.command = TC_TBF_DESTROY;
17062306a36Sopenharmony_ci	qopt.handle = sch->handle;
17162306a36Sopenharmony_ci	qopt.parent = sch->parent;
17262306a36Sopenharmony_ci	dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TBF, &qopt);
17362306a36Sopenharmony_ci}
17462306a36Sopenharmony_ci
17562306a36Sopenharmony_cistatic int tbf_offload_dump(struct Qdisc *sch)
17662306a36Sopenharmony_ci{
17762306a36Sopenharmony_ci	struct tc_tbf_qopt_offload qopt;
17862306a36Sopenharmony_ci
17962306a36Sopenharmony_ci	qopt.command = TC_TBF_STATS;
18062306a36Sopenharmony_ci	qopt.handle = sch->handle;
18162306a36Sopenharmony_ci	qopt.parent = sch->parent;
18262306a36Sopenharmony_ci	qopt.stats.bstats = &sch->bstats;
18362306a36Sopenharmony_ci	qopt.stats.qstats = &sch->qstats;
18462306a36Sopenharmony_ci
18562306a36Sopenharmony_ci	return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_TBF, &qopt);
18662306a36Sopenharmony_ci}
18762306a36Sopenharmony_ci
18862306a36Sopenharmony_cistatic void tbf_offload_graft(struct Qdisc *sch, struct Qdisc *new,
18962306a36Sopenharmony_ci			      struct Qdisc *old, struct netlink_ext_ack *extack)
19062306a36Sopenharmony_ci{
19162306a36Sopenharmony_ci	struct tc_tbf_qopt_offload graft_offload = {
19262306a36Sopenharmony_ci		.handle		= sch->handle,
19362306a36Sopenharmony_ci		.parent		= sch->parent,
19462306a36Sopenharmony_ci		.child_handle	= new->handle,
19562306a36Sopenharmony_ci		.command	= TC_TBF_GRAFT,
19662306a36Sopenharmony_ci	};
19762306a36Sopenharmony_ci
19862306a36Sopenharmony_ci	qdisc_offload_graft_helper(qdisc_dev(sch), sch, new, old,
19962306a36Sopenharmony_ci				   TC_SETUP_QDISC_TBF, &graft_offload, extack);
20062306a36Sopenharmony_ci}
20162306a36Sopenharmony_ci
20262306a36Sopenharmony_ci/* GSO packet is too big, segment it so that tbf can transmit
20362306a36Sopenharmony_ci * each segment in time
20462306a36Sopenharmony_ci */
20562306a36Sopenharmony_cistatic int tbf_segment(struct sk_buff *skb, struct Qdisc *sch,
20662306a36Sopenharmony_ci		       struct sk_buff **to_free)
20762306a36Sopenharmony_ci{
20862306a36Sopenharmony_ci	struct tbf_sched_data *q = qdisc_priv(sch);
20962306a36Sopenharmony_ci	struct sk_buff *segs, *nskb;
21062306a36Sopenharmony_ci	netdev_features_t features = netif_skb_features(skb);
21162306a36Sopenharmony_ci	unsigned int len = 0, prev_len = qdisc_pkt_len(skb);
21262306a36Sopenharmony_ci	int ret, nb;
21362306a36Sopenharmony_ci
21462306a36Sopenharmony_ci	segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
21562306a36Sopenharmony_ci
21662306a36Sopenharmony_ci	if (IS_ERR_OR_NULL(segs))
21762306a36Sopenharmony_ci		return qdisc_drop(skb, sch, to_free);
21862306a36Sopenharmony_ci
21962306a36Sopenharmony_ci	nb = 0;
22062306a36Sopenharmony_ci	skb_list_walk_safe(segs, segs, nskb) {
22162306a36Sopenharmony_ci		skb_mark_not_on_list(segs);
22262306a36Sopenharmony_ci		qdisc_skb_cb(segs)->pkt_len = segs->len;
22362306a36Sopenharmony_ci		len += segs->len;
22462306a36Sopenharmony_ci		ret = qdisc_enqueue(segs, q->qdisc, to_free);
22562306a36Sopenharmony_ci		if (ret != NET_XMIT_SUCCESS) {
22662306a36Sopenharmony_ci			if (net_xmit_drop_count(ret))
22762306a36Sopenharmony_ci				qdisc_qstats_drop(sch);
22862306a36Sopenharmony_ci		} else {
22962306a36Sopenharmony_ci			nb++;
23062306a36Sopenharmony_ci		}
23162306a36Sopenharmony_ci	}
23262306a36Sopenharmony_ci	sch->q.qlen += nb;
23362306a36Sopenharmony_ci	if (nb > 1)
23462306a36Sopenharmony_ci		qdisc_tree_reduce_backlog(sch, 1 - nb, prev_len - len);
23562306a36Sopenharmony_ci	consume_skb(skb);
23662306a36Sopenharmony_ci	return nb > 0 ? NET_XMIT_SUCCESS : NET_XMIT_DROP;
23762306a36Sopenharmony_ci}
23862306a36Sopenharmony_ci
23962306a36Sopenharmony_cistatic int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch,
24062306a36Sopenharmony_ci		       struct sk_buff **to_free)
24162306a36Sopenharmony_ci{
24262306a36Sopenharmony_ci	struct tbf_sched_data *q = qdisc_priv(sch);
24362306a36Sopenharmony_ci	unsigned int len = qdisc_pkt_len(skb);
24462306a36Sopenharmony_ci	int ret;
24562306a36Sopenharmony_ci
24662306a36Sopenharmony_ci	if (qdisc_pkt_len(skb) > q->max_size) {
24762306a36Sopenharmony_ci		if (skb_is_gso(skb) &&
24862306a36Sopenharmony_ci		    skb_gso_validate_mac_len(skb, q->max_size))
24962306a36Sopenharmony_ci			return tbf_segment(skb, sch, to_free);
25062306a36Sopenharmony_ci		return qdisc_drop(skb, sch, to_free);
25162306a36Sopenharmony_ci	}
25262306a36Sopenharmony_ci	ret = qdisc_enqueue(skb, q->qdisc, to_free);
25362306a36Sopenharmony_ci	if (ret != NET_XMIT_SUCCESS) {
25462306a36Sopenharmony_ci		if (net_xmit_drop_count(ret))
25562306a36Sopenharmony_ci			qdisc_qstats_drop(sch);
25662306a36Sopenharmony_ci		return ret;
25762306a36Sopenharmony_ci	}
25862306a36Sopenharmony_ci
25962306a36Sopenharmony_ci	sch->qstats.backlog += len;
26062306a36Sopenharmony_ci	sch->q.qlen++;
26162306a36Sopenharmony_ci	return NET_XMIT_SUCCESS;
26262306a36Sopenharmony_ci}
26362306a36Sopenharmony_ci
26462306a36Sopenharmony_cistatic bool tbf_peak_present(const struct tbf_sched_data *q)
26562306a36Sopenharmony_ci{
26662306a36Sopenharmony_ci	return q->peak.rate_bytes_ps;
26762306a36Sopenharmony_ci}
26862306a36Sopenharmony_ci
26962306a36Sopenharmony_cistatic struct sk_buff *tbf_dequeue(struct Qdisc *sch)
27062306a36Sopenharmony_ci{
27162306a36Sopenharmony_ci	struct tbf_sched_data *q = qdisc_priv(sch);
27262306a36Sopenharmony_ci	struct sk_buff *skb;
27362306a36Sopenharmony_ci
27462306a36Sopenharmony_ci	skb = q->qdisc->ops->peek(q->qdisc);
27562306a36Sopenharmony_ci
27662306a36Sopenharmony_ci	if (skb) {
27762306a36Sopenharmony_ci		s64 now;
27862306a36Sopenharmony_ci		s64 toks;
27962306a36Sopenharmony_ci		s64 ptoks = 0;
28062306a36Sopenharmony_ci		unsigned int len = qdisc_pkt_len(skb);
28162306a36Sopenharmony_ci
28262306a36Sopenharmony_ci		now = ktime_get_ns();
28362306a36Sopenharmony_ci		toks = min_t(s64, now - q->t_c, q->buffer);
28462306a36Sopenharmony_ci
28562306a36Sopenharmony_ci		if (tbf_peak_present(q)) {
28662306a36Sopenharmony_ci			ptoks = toks + q->ptokens;
28762306a36Sopenharmony_ci			if (ptoks > q->mtu)
28862306a36Sopenharmony_ci				ptoks = q->mtu;
28962306a36Sopenharmony_ci			ptoks -= (s64) psched_l2t_ns(&q->peak, len);
29062306a36Sopenharmony_ci		}
29162306a36Sopenharmony_ci		toks += q->tokens;
29262306a36Sopenharmony_ci		if (toks > q->buffer)
29362306a36Sopenharmony_ci			toks = q->buffer;
29462306a36Sopenharmony_ci		toks -= (s64) psched_l2t_ns(&q->rate, len);
29562306a36Sopenharmony_ci
29662306a36Sopenharmony_ci		if ((toks|ptoks) >= 0) {
29762306a36Sopenharmony_ci			skb = qdisc_dequeue_peeked(q->qdisc);
29862306a36Sopenharmony_ci			if (unlikely(!skb))
29962306a36Sopenharmony_ci				return NULL;
30062306a36Sopenharmony_ci
30162306a36Sopenharmony_ci			q->t_c = now;
30262306a36Sopenharmony_ci			q->tokens = toks;
30362306a36Sopenharmony_ci			q->ptokens = ptoks;
30462306a36Sopenharmony_ci			qdisc_qstats_backlog_dec(sch, skb);
30562306a36Sopenharmony_ci			sch->q.qlen--;
30662306a36Sopenharmony_ci			qdisc_bstats_update(sch, skb);
30762306a36Sopenharmony_ci			return skb;
30862306a36Sopenharmony_ci		}
30962306a36Sopenharmony_ci
31062306a36Sopenharmony_ci		qdisc_watchdog_schedule_ns(&q->watchdog,
31162306a36Sopenharmony_ci					   now + max_t(long, -toks, -ptoks));
31262306a36Sopenharmony_ci
31362306a36Sopenharmony_ci		/* Maybe we have a shorter packet in the queue,
31462306a36Sopenharmony_ci		   which can be sent now. It sounds cool,
31562306a36Sopenharmony_ci		   but, however, this is wrong in principle.
31662306a36Sopenharmony_ci		   We MUST NOT reorder packets under these circumstances.
31762306a36Sopenharmony_ci
31862306a36Sopenharmony_ci		   Really, if we split the flow into independent
31962306a36Sopenharmony_ci		   subflows, it would be a very good solution.
32062306a36Sopenharmony_ci		   This is the main idea of all FQ algorithms
32162306a36Sopenharmony_ci		   (cf. CSZ, HPFQ, HFSC)
32262306a36Sopenharmony_ci		 */
32362306a36Sopenharmony_ci
32462306a36Sopenharmony_ci		qdisc_qstats_overlimit(sch);
32562306a36Sopenharmony_ci	}
32662306a36Sopenharmony_ci	return NULL;
32762306a36Sopenharmony_ci}
32862306a36Sopenharmony_ci
32962306a36Sopenharmony_cistatic void tbf_reset(struct Qdisc *sch)
33062306a36Sopenharmony_ci{
33162306a36Sopenharmony_ci	struct tbf_sched_data *q = qdisc_priv(sch);
33262306a36Sopenharmony_ci
33362306a36Sopenharmony_ci	qdisc_reset(q->qdisc);
33462306a36Sopenharmony_ci	q->t_c = ktime_get_ns();
33562306a36Sopenharmony_ci	q->tokens = q->buffer;
33662306a36Sopenharmony_ci	q->ptokens = q->mtu;
33762306a36Sopenharmony_ci	qdisc_watchdog_cancel(&q->watchdog);
33862306a36Sopenharmony_ci}
33962306a36Sopenharmony_ci
34062306a36Sopenharmony_cistatic const struct nla_policy tbf_policy[TCA_TBF_MAX + 1] = {
34162306a36Sopenharmony_ci	[TCA_TBF_PARMS]	= { .len = sizeof(struct tc_tbf_qopt) },
34262306a36Sopenharmony_ci	[TCA_TBF_RTAB]	= { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
34362306a36Sopenharmony_ci	[TCA_TBF_PTAB]	= { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
34462306a36Sopenharmony_ci	[TCA_TBF_RATE64]	= { .type = NLA_U64 },
34562306a36Sopenharmony_ci	[TCA_TBF_PRATE64]	= { .type = NLA_U64 },
34662306a36Sopenharmony_ci	[TCA_TBF_BURST] = { .type = NLA_U32 },
34762306a36Sopenharmony_ci	[TCA_TBF_PBURST] = { .type = NLA_U32 },
34862306a36Sopenharmony_ci};
34962306a36Sopenharmony_ci
35062306a36Sopenharmony_cistatic int tbf_change(struct Qdisc *sch, struct nlattr *opt,
35162306a36Sopenharmony_ci		      struct netlink_ext_ack *extack)
35262306a36Sopenharmony_ci{
35362306a36Sopenharmony_ci	int err;
35462306a36Sopenharmony_ci	struct tbf_sched_data *q = qdisc_priv(sch);
35562306a36Sopenharmony_ci	struct nlattr *tb[TCA_TBF_MAX + 1];
35662306a36Sopenharmony_ci	struct tc_tbf_qopt *qopt;
35762306a36Sopenharmony_ci	struct Qdisc *child = NULL;
35862306a36Sopenharmony_ci	struct Qdisc *old = NULL;
35962306a36Sopenharmony_ci	struct psched_ratecfg rate;
36062306a36Sopenharmony_ci	struct psched_ratecfg peak;
36162306a36Sopenharmony_ci	u64 max_size;
36262306a36Sopenharmony_ci	s64 buffer, mtu;
36362306a36Sopenharmony_ci	u64 rate64 = 0, prate64 = 0;
36462306a36Sopenharmony_ci
36562306a36Sopenharmony_ci	err = nla_parse_nested_deprecated(tb, TCA_TBF_MAX, opt, tbf_policy,
36662306a36Sopenharmony_ci					  NULL);
36762306a36Sopenharmony_ci	if (err < 0)
36862306a36Sopenharmony_ci		return err;
36962306a36Sopenharmony_ci
37062306a36Sopenharmony_ci	err = -EINVAL;
37162306a36Sopenharmony_ci	if (tb[TCA_TBF_PARMS] == NULL)
37262306a36Sopenharmony_ci		goto done;
37362306a36Sopenharmony_ci
37462306a36Sopenharmony_ci	qopt = nla_data(tb[TCA_TBF_PARMS]);
37562306a36Sopenharmony_ci	if (qopt->rate.linklayer == TC_LINKLAYER_UNAWARE)
37662306a36Sopenharmony_ci		qdisc_put_rtab(qdisc_get_rtab(&qopt->rate,
37762306a36Sopenharmony_ci					      tb[TCA_TBF_RTAB],
37862306a36Sopenharmony_ci					      NULL));
37962306a36Sopenharmony_ci
38062306a36Sopenharmony_ci	if (qopt->peakrate.linklayer == TC_LINKLAYER_UNAWARE)
38162306a36Sopenharmony_ci			qdisc_put_rtab(qdisc_get_rtab(&qopt->peakrate,
38262306a36Sopenharmony_ci						      tb[TCA_TBF_PTAB],
38362306a36Sopenharmony_ci						      NULL));
38462306a36Sopenharmony_ci
38562306a36Sopenharmony_ci	buffer = min_t(u64, PSCHED_TICKS2NS(qopt->buffer), ~0U);
38662306a36Sopenharmony_ci	mtu = min_t(u64, PSCHED_TICKS2NS(qopt->mtu), ~0U);
38762306a36Sopenharmony_ci
38862306a36Sopenharmony_ci	if (tb[TCA_TBF_RATE64])
38962306a36Sopenharmony_ci		rate64 = nla_get_u64(tb[TCA_TBF_RATE64]);
39062306a36Sopenharmony_ci	psched_ratecfg_precompute(&rate, &qopt->rate, rate64);
39162306a36Sopenharmony_ci
39262306a36Sopenharmony_ci	if (tb[TCA_TBF_BURST]) {
39362306a36Sopenharmony_ci		max_size = nla_get_u32(tb[TCA_TBF_BURST]);
39462306a36Sopenharmony_ci		buffer = psched_l2t_ns(&rate, max_size);
39562306a36Sopenharmony_ci	} else {
39662306a36Sopenharmony_ci		max_size = min_t(u64, psched_ns_t2l(&rate, buffer), ~0U);
39762306a36Sopenharmony_ci	}
39862306a36Sopenharmony_ci
39962306a36Sopenharmony_ci	if (qopt->peakrate.rate) {
40062306a36Sopenharmony_ci		if (tb[TCA_TBF_PRATE64])
40162306a36Sopenharmony_ci			prate64 = nla_get_u64(tb[TCA_TBF_PRATE64]);
40262306a36Sopenharmony_ci		psched_ratecfg_precompute(&peak, &qopt->peakrate, prate64);
40362306a36Sopenharmony_ci		if (peak.rate_bytes_ps <= rate.rate_bytes_ps) {
40462306a36Sopenharmony_ci			pr_warn_ratelimited("sch_tbf: peakrate %llu is lower than or equals to rate %llu !\n",
40562306a36Sopenharmony_ci					peak.rate_bytes_ps, rate.rate_bytes_ps);
40662306a36Sopenharmony_ci			err = -EINVAL;
40762306a36Sopenharmony_ci			goto done;
40862306a36Sopenharmony_ci		}
40962306a36Sopenharmony_ci
41062306a36Sopenharmony_ci		if (tb[TCA_TBF_PBURST]) {
41162306a36Sopenharmony_ci			u32 pburst = nla_get_u32(tb[TCA_TBF_PBURST]);
41262306a36Sopenharmony_ci			max_size = min_t(u32, max_size, pburst);
41362306a36Sopenharmony_ci			mtu = psched_l2t_ns(&peak, pburst);
41462306a36Sopenharmony_ci		} else {
41562306a36Sopenharmony_ci			max_size = min_t(u64, max_size, psched_ns_t2l(&peak, mtu));
41662306a36Sopenharmony_ci		}
41762306a36Sopenharmony_ci	} else {
41862306a36Sopenharmony_ci		memset(&peak, 0, sizeof(peak));
41962306a36Sopenharmony_ci	}
42062306a36Sopenharmony_ci
42162306a36Sopenharmony_ci	if (max_size < psched_mtu(qdisc_dev(sch)))
42262306a36Sopenharmony_ci		pr_warn_ratelimited("sch_tbf: burst %llu is lower than device %s mtu (%u) !\n",
42362306a36Sopenharmony_ci				    max_size, qdisc_dev(sch)->name,
42462306a36Sopenharmony_ci				    psched_mtu(qdisc_dev(sch)));
42562306a36Sopenharmony_ci
42662306a36Sopenharmony_ci	if (!max_size) {
42762306a36Sopenharmony_ci		err = -EINVAL;
42862306a36Sopenharmony_ci		goto done;
42962306a36Sopenharmony_ci	}
43062306a36Sopenharmony_ci
43162306a36Sopenharmony_ci	if (q->qdisc != &noop_qdisc) {
43262306a36Sopenharmony_ci		err = fifo_set_limit(q->qdisc, qopt->limit);
43362306a36Sopenharmony_ci		if (err)
43462306a36Sopenharmony_ci			goto done;
43562306a36Sopenharmony_ci	} else if (qopt->limit > 0) {
43662306a36Sopenharmony_ci		child = fifo_create_dflt(sch, &bfifo_qdisc_ops, qopt->limit,
43762306a36Sopenharmony_ci					 extack);
43862306a36Sopenharmony_ci		if (IS_ERR(child)) {
43962306a36Sopenharmony_ci			err = PTR_ERR(child);
44062306a36Sopenharmony_ci			goto done;
44162306a36Sopenharmony_ci		}
44262306a36Sopenharmony_ci
44362306a36Sopenharmony_ci		/* child is fifo, no need to check for noop_qdisc */
44462306a36Sopenharmony_ci		qdisc_hash_add(child, true);
44562306a36Sopenharmony_ci	}
44662306a36Sopenharmony_ci
44762306a36Sopenharmony_ci	sch_tree_lock(sch);
44862306a36Sopenharmony_ci	if (child) {
44962306a36Sopenharmony_ci		qdisc_tree_flush_backlog(q->qdisc);
45062306a36Sopenharmony_ci		old = q->qdisc;
45162306a36Sopenharmony_ci		q->qdisc = child;
45262306a36Sopenharmony_ci	}
45362306a36Sopenharmony_ci	q->limit = qopt->limit;
45462306a36Sopenharmony_ci	if (tb[TCA_TBF_PBURST])
45562306a36Sopenharmony_ci		q->mtu = mtu;
45662306a36Sopenharmony_ci	else
45762306a36Sopenharmony_ci		q->mtu = PSCHED_TICKS2NS(qopt->mtu);
45862306a36Sopenharmony_ci	q->max_size = max_size;
45962306a36Sopenharmony_ci	if (tb[TCA_TBF_BURST])
46062306a36Sopenharmony_ci		q->buffer = buffer;
46162306a36Sopenharmony_ci	else
46262306a36Sopenharmony_ci		q->buffer = PSCHED_TICKS2NS(qopt->buffer);
46362306a36Sopenharmony_ci	q->tokens = q->buffer;
46462306a36Sopenharmony_ci	q->ptokens = q->mtu;
46562306a36Sopenharmony_ci
46662306a36Sopenharmony_ci	memcpy(&q->rate, &rate, sizeof(struct psched_ratecfg));
46762306a36Sopenharmony_ci	memcpy(&q->peak, &peak, sizeof(struct psched_ratecfg));
46862306a36Sopenharmony_ci
46962306a36Sopenharmony_ci	sch_tree_unlock(sch);
47062306a36Sopenharmony_ci	qdisc_put(old);
47162306a36Sopenharmony_ci	err = 0;
47262306a36Sopenharmony_ci
47362306a36Sopenharmony_ci	tbf_offload_change(sch);
47462306a36Sopenharmony_cidone:
47562306a36Sopenharmony_ci	return err;
47662306a36Sopenharmony_ci}
47762306a36Sopenharmony_ci
47862306a36Sopenharmony_cistatic int tbf_init(struct Qdisc *sch, struct nlattr *opt,
47962306a36Sopenharmony_ci		    struct netlink_ext_ack *extack)
48062306a36Sopenharmony_ci{
48162306a36Sopenharmony_ci	struct tbf_sched_data *q = qdisc_priv(sch);
48262306a36Sopenharmony_ci
48362306a36Sopenharmony_ci	qdisc_watchdog_init(&q->watchdog, sch);
48462306a36Sopenharmony_ci	q->qdisc = &noop_qdisc;
48562306a36Sopenharmony_ci
48662306a36Sopenharmony_ci	if (!opt)
48762306a36Sopenharmony_ci		return -EINVAL;
48862306a36Sopenharmony_ci
48962306a36Sopenharmony_ci	q->t_c = ktime_get_ns();
49062306a36Sopenharmony_ci
49162306a36Sopenharmony_ci	return tbf_change(sch, opt, extack);
49262306a36Sopenharmony_ci}
49362306a36Sopenharmony_ci
49462306a36Sopenharmony_cistatic void tbf_destroy(struct Qdisc *sch)
49562306a36Sopenharmony_ci{
49662306a36Sopenharmony_ci	struct tbf_sched_data *q = qdisc_priv(sch);
49762306a36Sopenharmony_ci
49862306a36Sopenharmony_ci	qdisc_watchdog_cancel(&q->watchdog);
49962306a36Sopenharmony_ci	tbf_offload_destroy(sch);
50062306a36Sopenharmony_ci	qdisc_put(q->qdisc);
50162306a36Sopenharmony_ci}
50262306a36Sopenharmony_ci
50362306a36Sopenharmony_cistatic int tbf_dump(struct Qdisc *sch, struct sk_buff *skb)
50462306a36Sopenharmony_ci{
50562306a36Sopenharmony_ci	struct tbf_sched_data *q = qdisc_priv(sch);
50662306a36Sopenharmony_ci	struct nlattr *nest;
50762306a36Sopenharmony_ci	struct tc_tbf_qopt opt;
50862306a36Sopenharmony_ci	int err;
50962306a36Sopenharmony_ci
51062306a36Sopenharmony_ci	err = tbf_offload_dump(sch);
51162306a36Sopenharmony_ci	if (err)
51262306a36Sopenharmony_ci		return err;
51362306a36Sopenharmony_ci
51462306a36Sopenharmony_ci	nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
51562306a36Sopenharmony_ci	if (nest == NULL)
51662306a36Sopenharmony_ci		goto nla_put_failure;
51762306a36Sopenharmony_ci
51862306a36Sopenharmony_ci	opt.limit = q->limit;
51962306a36Sopenharmony_ci	psched_ratecfg_getrate(&opt.rate, &q->rate);
52062306a36Sopenharmony_ci	if (tbf_peak_present(q))
52162306a36Sopenharmony_ci		psched_ratecfg_getrate(&opt.peakrate, &q->peak);
52262306a36Sopenharmony_ci	else
52362306a36Sopenharmony_ci		memset(&opt.peakrate, 0, sizeof(opt.peakrate));
52462306a36Sopenharmony_ci	opt.mtu = PSCHED_NS2TICKS(q->mtu);
52562306a36Sopenharmony_ci	opt.buffer = PSCHED_NS2TICKS(q->buffer);
52662306a36Sopenharmony_ci	if (nla_put(skb, TCA_TBF_PARMS, sizeof(opt), &opt))
52762306a36Sopenharmony_ci		goto nla_put_failure;
52862306a36Sopenharmony_ci	if (q->rate.rate_bytes_ps >= (1ULL << 32) &&
52962306a36Sopenharmony_ci	    nla_put_u64_64bit(skb, TCA_TBF_RATE64, q->rate.rate_bytes_ps,
53062306a36Sopenharmony_ci			      TCA_TBF_PAD))
53162306a36Sopenharmony_ci		goto nla_put_failure;
53262306a36Sopenharmony_ci	if (tbf_peak_present(q) &&
53362306a36Sopenharmony_ci	    q->peak.rate_bytes_ps >= (1ULL << 32) &&
53462306a36Sopenharmony_ci	    nla_put_u64_64bit(skb, TCA_TBF_PRATE64, q->peak.rate_bytes_ps,
53562306a36Sopenharmony_ci			      TCA_TBF_PAD))
53662306a36Sopenharmony_ci		goto nla_put_failure;
53762306a36Sopenharmony_ci
53862306a36Sopenharmony_ci	return nla_nest_end(skb, nest);
53962306a36Sopenharmony_ci
54062306a36Sopenharmony_cinla_put_failure:
54162306a36Sopenharmony_ci	nla_nest_cancel(skb, nest);
54262306a36Sopenharmony_ci	return -1;
54362306a36Sopenharmony_ci}
54462306a36Sopenharmony_ci
54562306a36Sopenharmony_cistatic int tbf_dump_class(struct Qdisc *sch, unsigned long cl,
54662306a36Sopenharmony_ci			  struct sk_buff *skb, struct tcmsg *tcm)
54762306a36Sopenharmony_ci{
54862306a36Sopenharmony_ci	struct tbf_sched_data *q = qdisc_priv(sch);
54962306a36Sopenharmony_ci
55062306a36Sopenharmony_ci	tcm->tcm_handle |= TC_H_MIN(1);
55162306a36Sopenharmony_ci	tcm->tcm_info = q->qdisc->handle;
55262306a36Sopenharmony_ci
55362306a36Sopenharmony_ci	return 0;
55462306a36Sopenharmony_ci}
55562306a36Sopenharmony_ci
55662306a36Sopenharmony_cistatic int tbf_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
55762306a36Sopenharmony_ci		     struct Qdisc **old, struct netlink_ext_ack *extack)
55862306a36Sopenharmony_ci{
55962306a36Sopenharmony_ci	struct tbf_sched_data *q = qdisc_priv(sch);
56062306a36Sopenharmony_ci
56162306a36Sopenharmony_ci	if (new == NULL)
56262306a36Sopenharmony_ci		new = &noop_qdisc;
56362306a36Sopenharmony_ci
56462306a36Sopenharmony_ci	*old = qdisc_replace(sch, new, &q->qdisc);
56562306a36Sopenharmony_ci
56662306a36Sopenharmony_ci	tbf_offload_graft(sch, new, *old, extack);
56762306a36Sopenharmony_ci	return 0;
56862306a36Sopenharmony_ci}
56962306a36Sopenharmony_ci
57062306a36Sopenharmony_cistatic struct Qdisc *tbf_leaf(struct Qdisc *sch, unsigned long arg)
57162306a36Sopenharmony_ci{
57262306a36Sopenharmony_ci	struct tbf_sched_data *q = qdisc_priv(sch);
57362306a36Sopenharmony_ci	return q->qdisc;
57462306a36Sopenharmony_ci}
57562306a36Sopenharmony_ci
57662306a36Sopenharmony_cistatic unsigned long tbf_find(struct Qdisc *sch, u32 classid)
57762306a36Sopenharmony_ci{
57862306a36Sopenharmony_ci	return 1;
57962306a36Sopenharmony_ci}
58062306a36Sopenharmony_ci
58162306a36Sopenharmony_cistatic void tbf_walk(struct Qdisc *sch, struct qdisc_walker *walker)
58262306a36Sopenharmony_ci{
58362306a36Sopenharmony_ci	if (!walker->stop) {
58462306a36Sopenharmony_ci		tc_qdisc_stats_dump(sch, 1, walker);
58562306a36Sopenharmony_ci	}
58662306a36Sopenharmony_ci}
58762306a36Sopenharmony_ci
58862306a36Sopenharmony_cistatic const struct Qdisc_class_ops tbf_class_ops = {
58962306a36Sopenharmony_ci	.graft		=	tbf_graft,
59062306a36Sopenharmony_ci	.leaf		=	tbf_leaf,
59162306a36Sopenharmony_ci	.find		=	tbf_find,
59262306a36Sopenharmony_ci	.walk		=	tbf_walk,
59362306a36Sopenharmony_ci	.dump		=	tbf_dump_class,
59462306a36Sopenharmony_ci};
59562306a36Sopenharmony_ci
59662306a36Sopenharmony_cistatic struct Qdisc_ops tbf_qdisc_ops __read_mostly = {
59762306a36Sopenharmony_ci	.next		=	NULL,
59862306a36Sopenharmony_ci	.cl_ops		=	&tbf_class_ops,
59962306a36Sopenharmony_ci	.id		=	"tbf",
60062306a36Sopenharmony_ci	.priv_size	=	sizeof(struct tbf_sched_data),
60162306a36Sopenharmony_ci	.enqueue	=	tbf_enqueue,
60262306a36Sopenharmony_ci	.dequeue	=	tbf_dequeue,
60362306a36Sopenharmony_ci	.peek		=	qdisc_peek_dequeued,
60462306a36Sopenharmony_ci	.init		=	tbf_init,
60562306a36Sopenharmony_ci	.reset		=	tbf_reset,
60662306a36Sopenharmony_ci	.destroy	=	tbf_destroy,
60762306a36Sopenharmony_ci	.change		=	tbf_change,
60862306a36Sopenharmony_ci	.dump		=	tbf_dump,
60962306a36Sopenharmony_ci	.owner		=	THIS_MODULE,
61062306a36Sopenharmony_ci};
61162306a36Sopenharmony_ci
61262306a36Sopenharmony_cistatic int __init tbf_module_init(void)
61362306a36Sopenharmony_ci{
61462306a36Sopenharmony_ci	return register_qdisc(&tbf_qdisc_ops);
61562306a36Sopenharmony_ci}
61662306a36Sopenharmony_ci
61762306a36Sopenharmony_cistatic void __exit tbf_module_exit(void)
61862306a36Sopenharmony_ci{
61962306a36Sopenharmony_ci	unregister_qdisc(&tbf_qdisc_ops);
62062306a36Sopenharmony_ci}
62162306a36Sopenharmony_cimodule_init(tbf_module_init)
62262306a36Sopenharmony_cimodule_exit(tbf_module_exit)
62362306a36Sopenharmony_ciMODULE_LICENSE("GPL");
624