xref: /kernel/linux/linux-5.10/net/sched/sch_tbf.c (revision 8c2ecf20)
18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * net/sched/sch_tbf.c	Token Bucket Filter queue.
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
68c2ecf20Sopenharmony_ci *		Dmitry Torokhov <dtor@mail.ru> - allow attaching inner qdiscs -
78c2ecf20Sopenharmony_ci *						 original idea by Martin Devera
88c2ecf20Sopenharmony_ci */
98c2ecf20Sopenharmony_ci
108c2ecf20Sopenharmony_ci#include <linux/module.h>
118c2ecf20Sopenharmony_ci#include <linux/types.h>
128c2ecf20Sopenharmony_ci#include <linux/kernel.h>
138c2ecf20Sopenharmony_ci#include <linux/string.h>
148c2ecf20Sopenharmony_ci#include <linux/errno.h>
158c2ecf20Sopenharmony_ci#include <linux/skbuff.h>
168c2ecf20Sopenharmony_ci#include <net/netlink.h>
178c2ecf20Sopenharmony_ci#include <net/sch_generic.h>
188c2ecf20Sopenharmony_ci#include <net/pkt_cls.h>
198c2ecf20Sopenharmony_ci#include <net/pkt_sched.h>
208c2ecf20Sopenharmony_ci
218c2ecf20Sopenharmony_ci
228c2ecf20Sopenharmony_ci/*	Simple Token Bucket Filter.
238c2ecf20Sopenharmony_ci	=======================================
248c2ecf20Sopenharmony_ci
258c2ecf20Sopenharmony_ci	SOURCE.
268c2ecf20Sopenharmony_ci	-------
278c2ecf20Sopenharmony_ci
288c2ecf20Sopenharmony_ci	None.
298c2ecf20Sopenharmony_ci
308c2ecf20Sopenharmony_ci	Description.
318c2ecf20Sopenharmony_ci	------------
328c2ecf20Sopenharmony_ci
338c2ecf20Sopenharmony_ci	A data flow obeys TBF with rate R and depth B, if for any
348c2ecf20Sopenharmony_ci	time interval t_i...t_f the number of transmitted bits
358c2ecf20Sopenharmony_ci	does not exceed B + R*(t_f-t_i).
368c2ecf20Sopenharmony_ci
378c2ecf20Sopenharmony_ci	Packetized version of this definition:
388c2ecf20Sopenharmony_ci	The sequence of packets of sizes s_i served at moments t_i
398c2ecf20Sopenharmony_ci	obeys TBF, if for any i<=k:
408c2ecf20Sopenharmony_ci
418c2ecf20Sopenharmony_ci	s_i+....+s_k <= B + R*(t_k - t_i)
428c2ecf20Sopenharmony_ci
438c2ecf20Sopenharmony_ci	Algorithm.
448c2ecf20Sopenharmony_ci	----------
458c2ecf20Sopenharmony_ci
468c2ecf20Sopenharmony_ci	Let N(t_i) be B/R initially and N(t) grow continuously with time as:
478c2ecf20Sopenharmony_ci
488c2ecf20Sopenharmony_ci	N(t+delta) = min{B/R, N(t) + delta}
498c2ecf20Sopenharmony_ci
508c2ecf20Sopenharmony_ci	If the first packet in queue has length S, it may be
518c2ecf20Sopenharmony_ci	transmitted only at the time t_* when S/R <= N(t_*),
528c2ecf20Sopenharmony_ci	and in this case N(t) jumps:
538c2ecf20Sopenharmony_ci
548c2ecf20Sopenharmony_ci	N(t_* + 0) = N(t_* - 0) - S/R.
558c2ecf20Sopenharmony_ci
568c2ecf20Sopenharmony_ci
578c2ecf20Sopenharmony_ci
588c2ecf20Sopenharmony_ci	Actually, QoS requires two TBF to be applied to a data stream.
598c2ecf20Sopenharmony_ci	One of them controls steady state burst size, another
608c2ecf20Sopenharmony_ci	one with rate P (peak rate) and depth M (equal to link MTU)
618c2ecf20Sopenharmony_ci	limits bursts at a smaller time scale.
628c2ecf20Sopenharmony_ci
638c2ecf20Sopenharmony_ci	It is easy to see that P>R, and B>M. If P is infinity, this double
648c2ecf20Sopenharmony_ci	TBF is equivalent to a single one.
658c2ecf20Sopenharmony_ci
668c2ecf20Sopenharmony_ci	When TBF works in reshaping mode, latency is estimated as:
678c2ecf20Sopenharmony_ci
688c2ecf20Sopenharmony_ci	lat = max ((L-B)/R, (L-M)/P)
698c2ecf20Sopenharmony_ci
708c2ecf20Sopenharmony_ci
718c2ecf20Sopenharmony_ci	NOTES.
728c2ecf20Sopenharmony_ci	------
738c2ecf20Sopenharmony_ci
748c2ecf20Sopenharmony_ci	If TBF throttles, it starts a watchdog timer, which will wake it up
758c2ecf20Sopenharmony_ci	when it is ready to transmit.
768c2ecf20Sopenharmony_ci	Note that the minimal timer resolution is 1/HZ.
778c2ecf20Sopenharmony_ci	If no new packets arrive during this period,
788c2ecf20Sopenharmony_ci	or if the device is not awaken by EOI for some previous packet,
798c2ecf20Sopenharmony_ci	TBF can stop its activity for 1/HZ.
808c2ecf20Sopenharmony_ci
818c2ecf20Sopenharmony_ci
828c2ecf20Sopenharmony_ci	This means, that with depth B, the maximal rate is
838c2ecf20Sopenharmony_ci
848c2ecf20Sopenharmony_ci	R_crit = B*HZ
858c2ecf20Sopenharmony_ci
868c2ecf20Sopenharmony_ci	F.e. for 10Mbit ethernet and HZ=100 the minimal allowed B is ~10Kbytes.
878c2ecf20Sopenharmony_ci
888c2ecf20Sopenharmony_ci	Note that the peak rate TBF is much more tough: with MTU 1500
898c2ecf20Sopenharmony_ci	P_crit = 150Kbytes/sec. So, if you need greater peak
908c2ecf20Sopenharmony_ci	rates, use alpha with HZ=1000 :-)
918c2ecf20Sopenharmony_ci
928c2ecf20Sopenharmony_ci	With classful TBF, limit is just kept for backwards compatibility.
938c2ecf20Sopenharmony_ci	It is passed to the default bfifo qdisc - if the inner qdisc is
948c2ecf20Sopenharmony_ci	changed the limit is not effective anymore.
958c2ecf20Sopenharmony_ci*/
968c2ecf20Sopenharmony_ci
978c2ecf20Sopenharmony_cistruct tbf_sched_data {
988c2ecf20Sopenharmony_ci/* Parameters */
998c2ecf20Sopenharmony_ci	u32		limit;		/* Maximal length of backlog: bytes */
1008c2ecf20Sopenharmony_ci	u32		max_size;
1018c2ecf20Sopenharmony_ci	s64		buffer;		/* Token bucket depth/rate: MUST BE >= MTU/B */
1028c2ecf20Sopenharmony_ci	s64		mtu;
1038c2ecf20Sopenharmony_ci	struct psched_ratecfg rate;
1048c2ecf20Sopenharmony_ci	struct psched_ratecfg peak;
1058c2ecf20Sopenharmony_ci
1068c2ecf20Sopenharmony_ci/* Variables */
1078c2ecf20Sopenharmony_ci	s64	tokens;			/* Current number of B tokens */
1088c2ecf20Sopenharmony_ci	s64	ptokens;		/* Current number of P tokens */
1098c2ecf20Sopenharmony_ci	s64	t_c;			/* Time check-point */
1108c2ecf20Sopenharmony_ci	struct Qdisc	*qdisc;		/* Inner qdisc, default - bfifo queue */
1118c2ecf20Sopenharmony_ci	struct qdisc_watchdog watchdog;	/* Watchdog timer */
1128c2ecf20Sopenharmony_ci};
1138c2ecf20Sopenharmony_ci
1148c2ecf20Sopenharmony_ci
1158c2ecf20Sopenharmony_ci/* Time to Length, convert time in ns to length in bytes
1168c2ecf20Sopenharmony_ci * to determinate how many bytes can be sent in given time.
1178c2ecf20Sopenharmony_ci */
1188c2ecf20Sopenharmony_cistatic u64 psched_ns_t2l(const struct psched_ratecfg *r,
1198c2ecf20Sopenharmony_ci			 u64 time_in_ns)
1208c2ecf20Sopenharmony_ci{
1218c2ecf20Sopenharmony_ci	/* The formula is :
1228c2ecf20Sopenharmony_ci	 * len = (time_in_ns * r->rate_bytes_ps) / NSEC_PER_SEC
1238c2ecf20Sopenharmony_ci	 */
1248c2ecf20Sopenharmony_ci	u64 len = time_in_ns * r->rate_bytes_ps;
1258c2ecf20Sopenharmony_ci
1268c2ecf20Sopenharmony_ci	do_div(len, NSEC_PER_SEC);
1278c2ecf20Sopenharmony_ci
1288c2ecf20Sopenharmony_ci	if (unlikely(r->linklayer == TC_LINKLAYER_ATM)) {
1298c2ecf20Sopenharmony_ci		do_div(len, 53);
1308c2ecf20Sopenharmony_ci		len = len * 48;
1318c2ecf20Sopenharmony_ci	}
1328c2ecf20Sopenharmony_ci
1338c2ecf20Sopenharmony_ci	if (len > r->overhead)
1348c2ecf20Sopenharmony_ci		len -= r->overhead;
1358c2ecf20Sopenharmony_ci	else
1368c2ecf20Sopenharmony_ci		len = 0;
1378c2ecf20Sopenharmony_ci
1388c2ecf20Sopenharmony_ci	return len;
1398c2ecf20Sopenharmony_ci}
1408c2ecf20Sopenharmony_ci
1418c2ecf20Sopenharmony_cistatic void tbf_offload_change(struct Qdisc *sch)
1428c2ecf20Sopenharmony_ci{
1438c2ecf20Sopenharmony_ci	struct tbf_sched_data *q = qdisc_priv(sch);
1448c2ecf20Sopenharmony_ci	struct net_device *dev = qdisc_dev(sch);
1458c2ecf20Sopenharmony_ci	struct tc_tbf_qopt_offload qopt;
1468c2ecf20Sopenharmony_ci
1478c2ecf20Sopenharmony_ci	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
1488c2ecf20Sopenharmony_ci		return;
1498c2ecf20Sopenharmony_ci
1508c2ecf20Sopenharmony_ci	qopt.command = TC_TBF_REPLACE;
1518c2ecf20Sopenharmony_ci	qopt.handle = sch->handle;
1528c2ecf20Sopenharmony_ci	qopt.parent = sch->parent;
1538c2ecf20Sopenharmony_ci	qopt.replace_params.rate = q->rate;
1548c2ecf20Sopenharmony_ci	qopt.replace_params.max_size = q->max_size;
1558c2ecf20Sopenharmony_ci	qopt.replace_params.qstats = &sch->qstats;
1568c2ecf20Sopenharmony_ci
1578c2ecf20Sopenharmony_ci	dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TBF, &qopt);
1588c2ecf20Sopenharmony_ci}
1598c2ecf20Sopenharmony_ci
1608c2ecf20Sopenharmony_cistatic void tbf_offload_destroy(struct Qdisc *sch)
1618c2ecf20Sopenharmony_ci{
1628c2ecf20Sopenharmony_ci	struct net_device *dev = qdisc_dev(sch);
1638c2ecf20Sopenharmony_ci	struct tc_tbf_qopt_offload qopt;
1648c2ecf20Sopenharmony_ci
1658c2ecf20Sopenharmony_ci	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
1668c2ecf20Sopenharmony_ci		return;
1678c2ecf20Sopenharmony_ci
1688c2ecf20Sopenharmony_ci	qopt.command = TC_TBF_DESTROY;
1698c2ecf20Sopenharmony_ci	qopt.handle = sch->handle;
1708c2ecf20Sopenharmony_ci	qopt.parent = sch->parent;
1718c2ecf20Sopenharmony_ci	dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TBF, &qopt);
1728c2ecf20Sopenharmony_ci}
1738c2ecf20Sopenharmony_ci
1748c2ecf20Sopenharmony_cistatic int tbf_offload_dump(struct Qdisc *sch)
1758c2ecf20Sopenharmony_ci{
1768c2ecf20Sopenharmony_ci	struct tc_tbf_qopt_offload qopt;
1778c2ecf20Sopenharmony_ci
1788c2ecf20Sopenharmony_ci	qopt.command = TC_TBF_STATS;
1798c2ecf20Sopenharmony_ci	qopt.handle = sch->handle;
1808c2ecf20Sopenharmony_ci	qopt.parent = sch->parent;
1818c2ecf20Sopenharmony_ci	qopt.stats.bstats = &sch->bstats;
1828c2ecf20Sopenharmony_ci	qopt.stats.qstats = &sch->qstats;
1838c2ecf20Sopenharmony_ci
1848c2ecf20Sopenharmony_ci	return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_TBF, &qopt);
1858c2ecf20Sopenharmony_ci}
1868c2ecf20Sopenharmony_ci
1878c2ecf20Sopenharmony_ci/* GSO packet is too big, segment it so that tbf can transmit
1888c2ecf20Sopenharmony_ci * each segment in time
1898c2ecf20Sopenharmony_ci */
1908c2ecf20Sopenharmony_cistatic int tbf_segment(struct sk_buff *skb, struct Qdisc *sch,
1918c2ecf20Sopenharmony_ci		       struct sk_buff **to_free)
1928c2ecf20Sopenharmony_ci{
1938c2ecf20Sopenharmony_ci	struct tbf_sched_data *q = qdisc_priv(sch);
1948c2ecf20Sopenharmony_ci	struct sk_buff *segs, *nskb;
1958c2ecf20Sopenharmony_ci	netdev_features_t features = netif_skb_features(skb);
1968c2ecf20Sopenharmony_ci	unsigned int len = 0, prev_len = qdisc_pkt_len(skb);
1978c2ecf20Sopenharmony_ci	int ret, nb;
1988c2ecf20Sopenharmony_ci
1998c2ecf20Sopenharmony_ci	segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
2008c2ecf20Sopenharmony_ci
2018c2ecf20Sopenharmony_ci	if (IS_ERR_OR_NULL(segs))
2028c2ecf20Sopenharmony_ci		return qdisc_drop(skb, sch, to_free);
2038c2ecf20Sopenharmony_ci
2048c2ecf20Sopenharmony_ci	nb = 0;
2058c2ecf20Sopenharmony_ci	skb_list_walk_safe(segs, segs, nskb) {
2068c2ecf20Sopenharmony_ci		skb_mark_not_on_list(segs);
2078c2ecf20Sopenharmony_ci		qdisc_skb_cb(segs)->pkt_len = segs->len;
2088c2ecf20Sopenharmony_ci		len += segs->len;
2098c2ecf20Sopenharmony_ci		ret = qdisc_enqueue(segs, q->qdisc, to_free);
2108c2ecf20Sopenharmony_ci		if (ret != NET_XMIT_SUCCESS) {
2118c2ecf20Sopenharmony_ci			if (net_xmit_drop_count(ret))
2128c2ecf20Sopenharmony_ci				qdisc_qstats_drop(sch);
2138c2ecf20Sopenharmony_ci		} else {
2148c2ecf20Sopenharmony_ci			nb++;
2158c2ecf20Sopenharmony_ci		}
2168c2ecf20Sopenharmony_ci	}
2178c2ecf20Sopenharmony_ci	sch->q.qlen += nb;
2188c2ecf20Sopenharmony_ci	if (nb > 1)
2198c2ecf20Sopenharmony_ci		qdisc_tree_reduce_backlog(sch, 1 - nb, prev_len - len);
2208c2ecf20Sopenharmony_ci	consume_skb(skb);
2218c2ecf20Sopenharmony_ci	return nb > 0 ? NET_XMIT_SUCCESS : NET_XMIT_DROP;
2228c2ecf20Sopenharmony_ci}
2238c2ecf20Sopenharmony_ci
2248c2ecf20Sopenharmony_cistatic int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch,
2258c2ecf20Sopenharmony_ci		       struct sk_buff **to_free)
2268c2ecf20Sopenharmony_ci{
2278c2ecf20Sopenharmony_ci	struct tbf_sched_data *q = qdisc_priv(sch);
2288c2ecf20Sopenharmony_ci	unsigned int len = qdisc_pkt_len(skb);
2298c2ecf20Sopenharmony_ci	int ret;
2308c2ecf20Sopenharmony_ci
2318c2ecf20Sopenharmony_ci	if (qdisc_pkt_len(skb) > q->max_size) {
2328c2ecf20Sopenharmony_ci		if (skb_is_gso(skb) &&
2338c2ecf20Sopenharmony_ci		    skb_gso_validate_mac_len(skb, q->max_size))
2348c2ecf20Sopenharmony_ci			return tbf_segment(skb, sch, to_free);
2358c2ecf20Sopenharmony_ci		return qdisc_drop(skb, sch, to_free);
2368c2ecf20Sopenharmony_ci	}
2378c2ecf20Sopenharmony_ci	ret = qdisc_enqueue(skb, q->qdisc, to_free);
2388c2ecf20Sopenharmony_ci	if (ret != NET_XMIT_SUCCESS) {
2398c2ecf20Sopenharmony_ci		if (net_xmit_drop_count(ret))
2408c2ecf20Sopenharmony_ci			qdisc_qstats_drop(sch);
2418c2ecf20Sopenharmony_ci		return ret;
2428c2ecf20Sopenharmony_ci	}
2438c2ecf20Sopenharmony_ci
2448c2ecf20Sopenharmony_ci	sch->qstats.backlog += len;
2458c2ecf20Sopenharmony_ci	sch->q.qlen++;
2468c2ecf20Sopenharmony_ci	return NET_XMIT_SUCCESS;
2478c2ecf20Sopenharmony_ci}
2488c2ecf20Sopenharmony_ci
2498c2ecf20Sopenharmony_cistatic bool tbf_peak_present(const struct tbf_sched_data *q)
2508c2ecf20Sopenharmony_ci{
2518c2ecf20Sopenharmony_ci	return q->peak.rate_bytes_ps;
2528c2ecf20Sopenharmony_ci}
2538c2ecf20Sopenharmony_ci
2548c2ecf20Sopenharmony_cistatic struct sk_buff *tbf_dequeue(struct Qdisc *sch)
2558c2ecf20Sopenharmony_ci{
2568c2ecf20Sopenharmony_ci	struct tbf_sched_data *q = qdisc_priv(sch);
2578c2ecf20Sopenharmony_ci	struct sk_buff *skb;
2588c2ecf20Sopenharmony_ci
2598c2ecf20Sopenharmony_ci	skb = q->qdisc->ops->peek(q->qdisc);
2608c2ecf20Sopenharmony_ci
2618c2ecf20Sopenharmony_ci	if (skb) {
2628c2ecf20Sopenharmony_ci		s64 now;
2638c2ecf20Sopenharmony_ci		s64 toks;
2648c2ecf20Sopenharmony_ci		s64 ptoks = 0;
2658c2ecf20Sopenharmony_ci		unsigned int len = qdisc_pkt_len(skb);
2668c2ecf20Sopenharmony_ci
2678c2ecf20Sopenharmony_ci		now = ktime_get_ns();
2688c2ecf20Sopenharmony_ci		toks = min_t(s64, now - q->t_c, q->buffer);
2698c2ecf20Sopenharmony_ci
2708c2ecf20Sopenharmony_ci		if (tbf_peak_present(q)) {
2718c2ecf20Sopenharmony_ci			ptoks = toks + q->ptokens;
2728c2ecf20Sopenharmony_ci			if (ptoks > q->mtu)
2738c2ecf20Sopenharmony_ci				ptoks = q->mtu;
2748c2ecf20Sopenharmony_ci			ptoks -= (s64) psched_l2t_ns(&q->peak, len);
2758c2ecf20Sopenharmony_ci		}
2768c2ecf20Sopenharmony_ci		toks += q->tokens;
2778c2ecf20Sopenharmony_ci		if (toks > q->buffer)
2788c2ecf20Sopenharmony_ci			toks = q->buffer;
2798c2ecf20Sopenharmony_ci		toks -= (s64) psched_l2t_ns(&q->rate, len);
2808c2ecf20Sopenharmony_ci
2818c2ecf20Sopenharmony_ci		if ((toks|ptoks) >= 0) {
2828c2ecf20Sopenharmony_ci			skb = qdisc_dequeue_peeked(q->qdisc);
2838c2ecf20Sopenharmony_ci			if (unlikely(!skb))
2848c2ecf20Sopenharmony_ci				return NULL;
2858c2ecf20Sopenharmony_ci
2868c2ecf20Sopenharmony_ci			q->t_c = now;
2878c2ecf20Sopenharmony_ci			q->tokens = toks;
2888c2ecf20Sopenharmony_ci			q->ptokens = ptoks;
2898c2ecf20Sopenharmony_ci			qdisc_qstats_backlog_dec(sch, skb);
2908c2ecf20Sopenharmony_ci			sch->q.qlen--;
2918c2ecf20Sopenharmony_ci			qdisc_bstats_update(sch, skb);
2928c2ecf20Sopenharmony_ci			return skb;
2938c2ecf20Sopenharmony_ci		}
2948c2ecf20Sopenharmony_ci
2958c2ecf20Sopenharmony_ci		qdisc_watchdog_schedule_ns(&q->watchdog,
2968c2ecf20Sopenharmony_ci					   now + max_t(long, -toks, -ptoks));
2978c2ecf20Sopenharmony_ci
2988c2ecf20Sopenharmony_ci		/* Maybe we have a shorter packet in the queue,
2998c2ecf20Sopenharmony_ci		   which can be sent now. It sounds cool,
3008c2ecf20Sopenharmony_ci		   but, however, this is wrong in principle.
3018c2ecf20Sopenharmony_ci		   We MUST NOT reorder packets under these circumstances.
3028c2ecf20Sopenharmony_ci
3038c2ecf20Sopenharmony_ci		   Really, if we split the flow into independent
3048c2ecf20Sopenharmony_ci		   subflows, it would be a very good solution.
3058c2ecf20Sopenharmony_ci		   This is the main idea of all FQ algorithms
3068c2ecf20Sopenharmony_ci		   (cf. CSZ, HPFQ, HFSC)
3078c2ecf20Sopenharmony_ci		 */
3088c2ecf20Sopenharmony_ci
3098c2ecf20Sopenharmony_ci		qdisc_qstats_overlimit(sch);
3108c2ecf20Sopenharmony_ci	}
3118c2ecf20Sopenharmony_ci	return NULL;
3128c2ecf20Sopenharmony_ci}
3138c2ecf20Sopenharmony_ci
3148c2ecf20Sopenharmony_cistatic void tbf_reset(struct Qdisc *sch)
3158c2ecf20Sopenharmony_ci{
3168c2ecf20Sopenharmony_ci	struct tbf_sched_data *q = qdisc_priv(sch);
3178c2ecf20Sopenharmony_ci
3188c2ecf20Sopenharmony_ci	qdisc_reset(q->qdisc);
3198c2ecf20Sopenharmony_ci	q->t_c = ktime_get_ns();
3208c2ecf20Sopenharmony_ci	q->tokens = q->buffer;
3218c2ecf20Sopenharmony_ci	q->ptokens = q->mtu;
3228c2ecf20Sopenharmony_ci	qdisc_watchdog_cancel(&q->watchdog);
3238c2ecf20Sopenharmony_ci}
3248c2ecf20Sopenharmony_ci
3258c2ecf20Sopenharmony_cistatic const struct nla_policy tbf_policy[TCA_TBF_MAX + 1] = {
3268c2ecf20Sopenharmony_ci	[TCA_TBF_PARMS]	= { .len = sizeof(struct tc_tbf_qopt) },
3278c2ecf20Sopenharmony_ci	[TCA_TBF_RTAB]	= { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
3288c2ecf20Sopenharmony_ci	[TCA_TBF_PTAB]	= { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
3298c2ecf20Sopenharmony_ci	[TCA_TBF_RATE64]	= { .type = NLA_U64 },
3308c2ecf20Sopenharmony_ci	[TCA_TBF_PRATE64]	= { .type = NLA_U64 },
3318c2ecf20Sopenharmony_ci	[TCA_TBF_BURST] = { .type = NLA_U32 },
3328c2ecf20Sopenharmony_ci	[TCA_TBF_PBURST] = { .type = NLA_U32 },
3338c2ecf20Sopenharmony_ci};
3348c2ecf20Sopenharmony_ci
3358c2ecf20Sopenharmony_cistatic int tbf_change(struct Qdisc *sch, struct nlattr *opt,
3368c2ecf20Sopenharmony_ci		      struct netlink_ext_ack *extack)
3378c2ecf20Sopenharmony_ci{
3388c2ecf20Sopenharmony_ci	int err;
3398c2ecf20Sopenharmony_ci	struct tbf_sched_data *q = qdisc_priv(sch);
3408c2ecf20Sopenharmony_ci	struct nlattr *tb[TCA_TBF_MAX + 1];
3418c2ecf20Sopenharmony_ci	struct tc_tbf_qopt *qopt;
3428c2ecf20Sopenharmony_ci	struct Qdisc *child = NULL;
3438c2ecf20Sopenharmony_ci	struct Qdisc *old = NULL;
3448c2ecf20Sopenharmony_ci	struct psched_ratecfg rate;
3458c2ecf20Sopenharmony_ci	struct psched_ratecfg peak;
3468c2ecf20Sopenharmony_ci	u64 max_size;
3478c2ecf20Sopenharmony_ci	s64 buffer, mtu;
3488c2ecf20Sopenharmony_ci	u64 rate64 = 0, prate64 = 0;
3498c2ecf20Sopenharmony_ci
3508c2ecf20Sopenharmony_ci	err = nla_parse_nested_deprecated(tb, TCA_TBF_MAX, opt, tbf_policy,
3518c2ecf20Sopenharmony_ci					  NULL);
3528c2ecf20Sopenharmony_ci	if (err < 0)
3538c2ecf20Sopenharmony_ci		return err;
3548c2ecf20Sopenharmony_ci
3558c2ecf20Sopenharmony_ci	err = -EINVAL;
3568c2ecf20Sopenharmony_ci	if (tb[TCA_TBF_PARMS] == NULL)
3578c2ecf20Sopenharmony_ci		goto done;
3588c2ecf20Sopenharmony_ci
3598c2ecf20Sopenharmony_ci	qopt = nla_data(tb[TCA_TBF_PARMS]);
3608c2ecf20Sopenharmony_ci	if (qopt->rate.linklayer == TC_LINKLAYER_UNAWARE)
3618c2ecf20Sopenharmony_ci		qdisc_put_rtab(qdisc_get_rtab(&qopt->rate,
3628c2ecf20Sopenharmony_ci					      tb[TCA_TBF_RTAB],
3638c2ecf20Sopenharmony_ci					      NULL));
3648c2ecf20Sopenharmony_ci
3658c2ecf20Sopenharmony_ci	if (qopt->peakrate.linklayer == TC_LINKLAYER_UNAWARE)
3668c2ecf20Sopenharmony_ci			qdisc_put_rtab(qdisc_get_rtab(&qopt->peakrate,
3678c2ecf20Sopenharmony_ci						      tb[TCA_TBF_PTAB],
3688c2ecf20Sopenharmony_ci						      NULL));
3698c2ecf20Sopenharmony_ci
3708c2ecf20Sopenharmony_ci	buffer = min_t(u64, PSCHED_TICKS2NS(qopt->buffer), ~0U);
3718c2ecf20Sopenharmony_ci	mtu = min_t(u64, PSCHED_TICKS2NS(qopt->mtu), ~0U);
3728c2ecf20Sopenharmony_ci
3738c2ecf20Sopenharmony_ci	if (tb[TCA_TBF_RATE64])
3748c2ecf20Sopenharmony_ci		rate64 = nla_get_u64(tb[TCA_TBF_RATE64]);
3758c2ecf20Sopenharmony_ci	psched_ratecfg_precompute(&rate, &qopt->rate, rate64);
3768c2ecf20Sopenharmony_ci
3778c2ecf20Sopenharmony_ci	if (tb[TCA_TBF_BURST]) {
3788c2ecf20Sopenharmony_ci		max_size = nla_get_u32(tb[TCA_TBF_BURST]);
3798c2ecf20Sopenharmony_ci		buffer = psched_l2t_ns(&rate, max_size);
3808c2ecf20Sopenharmony_ci	} else {
3818c2ecf20Sopenharmony_ci		max_size = min_t(u64, psched_ns_t2l(&rate, buffer), ~0U);
3828c2ecf20Sopenharmony_ci	}
3838c2ecf20Sopenharmony_ci
3848c2ecf20Sopenharmony_ci	if (qopt->peakrate.rate) {
3858c2ecf20Sopenharmony_ci		if (tb[TCA_TBF_PRATE64])
3868c2ecf20Sopenharmony_ci			prate64 = nla_get_u64(tb[TCA_TBF_PRATE64]);
3878c2ecf20Sopenharmony_ci		psched_ratecfg_precompute(&peak, &qopt->peakrate, prate64);
3888c2ecf20Sopenharmony_ci		if (peak.rate_bytes_ps <= rate.rate_bytes_ps) {
3898c2ecf20Sopenharmony_ci			pr_warn_ratelimited("sch_tbf: peakrate %llu is lower than or equals to rate %llu !\n",
3908c2ecf20Sopenharmony_ci					peak.rate_bytes_ps, rate.rate_bytes_ps);
3918c2ecf20Sopenharmony_ci			err = -EINVAL;
3928c2ecf20Sopenharmony_ci			goto done;
3938c2ecf20Sopenharmony_ci		}
3948c2ecf20Sopenharmony_ci
3958c2ecf20Sopenharmony_ci		if (tb[TCA_TBF_PBURST]) {
3968c2ecf20Sopenharmony_ci			u32 pburst = nla_get_u32(tb[TCA_TBF_PBURST]);
3978c2ecf20Sopenharmony_ci			max_size = min_t(u32, max_size, pburst);
3988c2ecf20Sopenharmony_ci			mtu = psched_l2t_ns(&peak, pburst);
3998c2ecf20Sopenharmony_ci		} else {
4008c2ecf20Sopenharmony_ci			max_size = min_t(u64, max_size, psched_ns_t2l(&peak, mtu));
4018c2ecf20Sopenharmony_ci		}
4028c2ecf20Sopenharmony_ci	} else {
4038c2ecf20Sopenharmony_ci		memset(&peak, 0, sizeof(peak));
4048c2ecf20Sopenharmony_ci	}
4058c2ecf20Sopenharmony_ci
4068c2ecf20Sopenharmony_ci	if (max_size < psched_mtu(qdisc_dev(sch)))
4078c2ecf20Sopenharmony_ci		pr_warn_ratelimited("sch_tbf: burst %llu is lower than device %s mtu (%u) !\n",
4088c2ecf20Sopenharmony_ci				    max_size, qdisc_dev(sch)->name,
4098c2ecf20Sopenharmony_ci				    psched_mtu(qdisc_dev(sch)));
4108c2ecf20Sopenharmony_ci
4118c2ecf20Sopenharmony_ci	if (!max_size) {
4128c2ecf20Sopenharmony_ci		err = -EINVAL;
4138c2ecf20Sopenharmony_ci		goto done;
4148c2ecf20Sopenharmony_ci	}
4158c2ecf20Sopenharmony_ci
4168c2ecf20Sopenharmony_ci	if (q->qdisc != &noop_qdisc) {
4178c2ecf20Sopenharmony_ci		err = fifo_set_limit(q->qdisc, qopt->limit);
4188c2ecf20Sopenharmony_ci		if (err)
4198c2ecf20Sopenharmony_ci			goto done;
4208c2ecf20Sopenharmony_ci	} else if (qopt->limit > 0) {
4218c2ecf20Sopenharmony_ci		child = fifo_create_dflt(sch, &bfifo_qdisc_ops, qopt->limit,
4228c2ecf20Sopenharmony_ci					 extack);
4238c2ecf20Sopenharmony_ci		if (IS_ERR(child)) {
4248c2ecf20Sopenharmony_ci			err = PTR_ERR(child);
4258c2ecf20Sopenharmony_ci			goto done;
4268c2ecf20Sopenharmony_ci		}
4278c2ecf20Sopenharmony_ci
4288c2ecf20Sopenharmony_ci		/* child is fifo, no need to check for noop_qdisc */
4298c2ecf20Sopenharmony_ci		qdisc_hash_add(child, true);
4308c2ecf20Sopenharmony_ci	}
4318c2ecf20Sopenharmony_ci
4328c2ecf20Sopenharmony_ci	sch_tree_lock(sch);
4338c2ecf20Sopenharmony_ci	if (child) {
4348c2ecf20Sopenharmony_ci		qdisc_tree_flush_backlog(q->qdisc);
4358c2ecf20Sopenharmony_ci		old = q->qdisc;
4368c2ecf20Sopenharmony_ci		q->qdisc = child;
4378c2ecf20Sopenharmony_ci	}
4388c2ecf20Sopenharmony_ci	q->limit = qopt->limit;
4398c2ecf20Sopenharmony_ci	if (tb[TCA_TBF_PBURST])
4408c2ecf20Sopenharmony_ci		q->mtu = mtu;
4418c2ecf20Sopenharmony_ci	else
4428c2ecf20Sopenharmony_ci		q->mtu = PSCHED_TICKS2NS(qopt->mtu);
4438c2ecf20Sopenharmony_ci	q->max_size = max_size;
4448c2ecf20Sopenharmony_ci	if (tb[TCA_TBF_BURST])
4458c2ecf20Sopenharmony_ci		q->buffer = buffer;
4468c2ecf20Sopenharmony_ci	else
4478c2ecf20Sopenharmony_ci		q->buffer = PSCHED_TICKS2NS(qopt->buffer);
4488c2ecf20Sopenharmony_ci	q->tokens = q->buffer;
4498c2ecf20Sopenharmony_ci	q->ptokens = q->mtu;
4508c2ecf20Sopenharmony_ci
4518c2ecf20Sopenharmony_ci	memcpy(&q->rate, &rate, sizeof(struct psched_ratecfg));
4528c2ecf20Sopenharmony_ci	memcpy(&q->peak, &peak, sizeof(struct psched_ratecfg));
4538c2ecf20Sopenharmony_ci
4548c2ecf20Sopenharmony_ci	sch_tree_unlock(sch);
4558c2ecf20Sopenharmony_ci	qdisc_put(old);
4568c2ecf20Sopenharmony_ci	err = 0;
4578c2ecf20Sopenharmony_ci
4588c2ecf20Sopenharmony_ci	tbf_offload_change(sch);
4598c2ecf20Sopenharmony_cidone:
4608c2ecf20Sopenharmony_ci	return err;
4618c2ecf20Sopenharmony_ci}
4628c2ecf20Sopenharmony_ci
4638c2ecf20Sopenharmony_cistatic int tbf_init(struct Qdisc *sch, struct nlattr *opt,
4648c2ecf20Sopenharmony_ci		    struct netlink_ext_ack *extack)
4658c2ecf20Sopenharmony_ci{
4668c2ecf20Sopenharmony_ci	struct tbf_sched_data *q = qdisc_priv(sch);
4678c2ecf20Sopenharmony_ci
4688c2ecf20Sopenharmony_ci	qdisc_watchdog_init(&q->watchdog, sch);
4698c2ecf20Sopenharmony_ci	q->qdisc = &noop_qdisc;
4708c2ecf20Sopenharmony_ci
4718c2ecf20Sopenharmony_ci	if (!opt)
4728c2ecf20Sopenharmony_ci		return -EINVAL;
4738c2ecf20Sopenharmony_ci
4748c2ecf20Sopenharmony_ci	q->t_c = ktime_get_ns();
4758c2ecf20Sopenharmony_ci
4768c2ecf20Sopenharmony_ci	return tbf_change(sch, opt, extack);
4778c2ecf20Sopenharmony_ci}
4788c2ecf20Sopenharmony_ci
4798c2ecf20Sopenharmony_cistatic void tbf_destroy(struct Qdisc *sch)
4808c2ecf20Sopenharmony_ci{
4818c2ecf20Sopenharmony_ci	struct tbf_sched_data *q = qdisc_priv(sch);
4828c2ecf20Sopenharmony_ci
4838c2ecf20Sopenharmony_ci	qdisc_watchdog_cancel(&q->watchdog);
4848c2ecf20Sopenharmony_ci	tbf_offload_destroy(sch);
4858c2ecf20Sopenharmony_ci	qdisc_put(q->qdisc);
4868c2ecf20Sopenharmony_ci}
4878c2ecf20Sopenharmony_ci
4888c2ecf20Sopenharmony_cistatic int tbf_dump(struct Qdisc *sch, struct sk_buff *skb)
4898c2ecf20Sopenharmony_ci{
4908c2ecf20Sopenharmony_ci	struct tbf_sched_data *q = qdisc_priv(sch);
4918c2ecf20Sopenharmony_ci	struct nlattr *nest;
4928c2ecf20Sopenharmony_ci	struct tc_tbf_qopt opt;
4938c2ecf20Sopenharmony_ci	int err;
4948c2ecf20Sopenharmony_ci
4958c2ecf20Sopenharmony_ci	err = tbf_offload_dump(sch);
4968c2ecf20Sopenharmony_ci	if (err)
4978c2ecf20Sopenharmony_ci		return err;
4988c2ecf20Sopenharmony_ci
4998c2ecf20Sopenharmony_ci	nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
5008c2ecf20Sopenharmony_ci	if (nest == NULL)
5018c2ecf20Sopenharmony_ci		goto nla_put_failure;
5028c2ecf20Sopenharmony_ci
5038c2ecf20Sopenharmony_ci	opt.limit = q->limit;
5048c2ecf20Sopenharmony_ci	psched_ratecfg_getrate(&opt.rate, &q->rate);
5058c2ecf20Sopenharmony_ci	if (tbf_peak_present(q))
5068c2ecf20Sopenharmony_ci		psched_ratecfg_getrate(&opt.peakrate, &q->peak);
5078c2ecf20Sopenharmony_ci	else
5088c2ecf20Sopenharmony_ci		memset(&opt.peakrate, 0, sizeof(opt.peakrate));
5098c2ecf20Sopenharmony_ci	opt.mtu = PSCHED_NS2TICKS(q->mtu);
5108c2ecf20Sopenharmony_ci	opt.buffer = PSCHED_NS2TICKS(q->buffer);
5118c2ecf20Sopenharmony_ci	if (nla_put(skb, TCA_TBF_PARMS, sizeof(opt), &opt))
5128c2ecf20Sopenharmony_ci		goto nla_put_failure;
5138c2ecf20Sopenharmony_ci	if (q->rate.rate_bytes_ps >= (1ULL << 32) &&
5148c2ecf20Sopenharmony_ci	    nla_put_u64_64bit(skb, TCA_TBF_RATE64, q->rate.rate_bytes_ps,
5158c2ecf20Sopenharmony_ci			      TCA_TBF_PAD))
5168c2ecf20Sopenharmony_ci		goto nla_put_failure;
5178c2ecf20Sopenharmony_ci	if (tbf_peak_present(q) &&
5188c2ecf20Sopenharmony_ci	    q->peak.rate_bytes_ps >= (1ULL << 32) &&
5198c2ecf20Sopenharmony_ci	    nla_put_u64_64bit(skb, TCA_TBF_PRATE64, q->peak.rate_bytes_ps,
5208c2ecf20Sopenharmony_ci			      TCA_TBF_PAD))
5218c2ecf20Sopenharmony_ci		goto nla_put_failure;
5228c2ecf20Sopenharmony_ci
5238c2ecf20Sopenharmony_ci	return nla_nest_end(skb, nest);
5248c2ecf20Sopenharmony_ci
5258c2ecf20Sopenharmony_cinla_put_failure:
5268c2ecf20Sopenharmony_ci	nla_nest_cancel(skb, nest);
5278c2ecf20Sopenharmony_ci	return -1;
5288c2ecf20Sopenharmony_ci}
5298c2ecf20Sopenharmony_ci
5308c2ecf20Sopenharmony_cistatic int tbf_dump_class(struct Qdisc *sch, unsigned long cl,
5318c2ecf20Sopenharmony_ci			  struct sk_buff *skb, struct tcmsg *tcm)
5328c2ecf20Sopenharmony_ci{
5338c2ecf20Sopenharmony_ci	struct tbf_sched_data *q = qdisc_priv(sch);
5348c2ecf20Sopenharmony_ci
5358c2ecf20Sopenharmony_ci	tcm->tcm_handle |= TC_H_MIN(1);
5368c2ecf20Sopenharmony_ci	tcm->tcm_info = q->qdisc->handle;
5378c2ecf20Sopenharmony_ci
5388c2ecf20Sopenharmony_ci	return 0;
5398c2ecf20Sopenharmony_ci}
5408c2ecf20Sopenharmony_ci
5418c2ecf20Sopenharmony_cistatic int tbf_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
5428c2ecf20Sopenharmony_ci		     struct Qdisc **old, struct netlink_ext_ack *extack)
5438c2ecf20Sopenharmony_ci{
5448c2ecf20Sopenharmony_ci	struct tbf_sched_data *q = qdisc_priv(sch);
5458c2ecf20Sopenharmony_ci
5468c2ecf20Sopenharmony_ci	if (new == NULL)
5478c2ecf20Sopenharmony_ci		new = &noop_qdisc;
5488c2ecf20Sopenharmony_ci
5498c2ecf20Sopenharmony_ci	*old = qdisc_replace(sch, new, &q->qdisc);
5508c2ecf20Sopenharmony_ci	return 0;
5518c2ecf20Sopenharmony_ci}
5528c2ecf20Sopenharmony_ci
5538c2ecf20Sopenharmony_cistatic struct Qdisc *tbf_leaf(struct Qdisc *sch, unsigned long arg)
5548c2ecf20Sopenharmony_ci{
5558c2ecf20Sopenharmony_ci	struct tbf_sched_data *q = qdisc_priv(sch);
5568c2ecf20Sopenharmony_ci	return q->qdisc;
5578c2ecf20Sopenharmony_ci}
5588c2ecf20Sopenharmony_ci
5598c2ecf20Sopenharmony_cistatic unsigned long tbf_find(struct Qdisc *sch, u32 classid)
5608c2ecf20Sopenharmony_ci{
5618c2ecf20Sopenharmony_ci	return 1;
5628c2ecf20Sopenharmony_ci}
5638c2ecf20Sopenharmony_ci
5648c2ecf20Sopenharmony_cistatic void tbf_walk(struct Qdisc *sch, struct qdisc_walker *walker)
5658c2ecf20Sopenharmony_ci{
5668c2ecf20Sopenharmony_ci	if (!walker->stop) {
5678c2ecf20Sopenharmony_ci		if (walker->count >= walker->skip)
5688c2ecf20Sopenharmony_ci			if (walker->fn(sch, 1, walker) < 0) {
5698c2ecf20Sopenharmony_ci				walker->stop = 1;
5708c2ecf20Sopenharmony_ci				return;
5718c2ecf20Sopenharmony_ci			}
5728c2ecf20Sopenharmony_ci		walker->count++;
5738c2ecf20Sopenharmony_ci	}
5748c2ecf20Sopenharmony_ci}
5758c2ecf20Sopenharmony_ci
5768c2ecf20Sopenharmony_cistatic const struct Qdisc_class_ops tbf_class_ops = {
5778c2ecf20Sopenharmony_ci	.graft		=	tbf_graft,
5788c2ecf20Sopenharmony_ci	.leaf		=	tbf_leaf,
5798c2ecf20Sopenharmony_ci	.find		=	tbf_find,
5808c2ecf20Sopenharmony_ci	.walk		=	tbf_walk,
5818c2ecf20Sopenharmony_ci	.dump		=	tbf_dump_class,
5828c2ecf20Sopenharmony_ci};
5838c2ecf20Sopenharmony_ci
5848c2ecf20Sopenharmony_cistatic struct Qdisc_ops tbf_qdisc_ops __read_mostly = {
5858c2ecf20Sopenharmony_ci	.next		=	NULL,
5868c2ecf20Sopenharmony_ci	.cl_ops		=	&tbf_class_ops,
5878c2ecf20Sopenharmony_ci	.id		=	"tbf",
5888c2ecf20Sopenharmony_ci	.priv_size	=	sizeof(struct tbf_sched_data),
5898c2ecf20Sopenharmony_ci	.enqueue	=	tbf_enqueue,
5908c2ecf20Sopenharmony_ci	.dequeue	=	tbf_dequeue,
5918c2ecf20Sopenharmony_ci	.peek		=	qdisc_peek_dequeued,
5928c2ecf20Sopenharmony_ci	.init		=	tbf_init,
5938c2ecf20Sopenharmony_ci	.reset		=	tbf_reset,
5948c2ecf20Sopenharmony_ci	.destroy	=	tbf_destroy,
5958c2ecf20Sopenharmony_ci	.change		=	tbf_change,
5968c2ecf20Sopenharmony_ci	.dump		=	tbf_dump,
5978c2ecf20Sopenharmony_ci	.owner		=	THIS_MODULE,
5988c2ecf20Sopenharmony_ci};
5998c2ecf20Sopenharmony_ci
6008c2ecf20Sopenharmony_cistatic int __init tbf_module_init(void)
6018c2ecf20Sopenharmony_ci{
6028c2ecf20Sopenharmony_ci	return register_qdisc(&tbf_qdisc_ops);
6038c2ecf20Sopenharmony_ci}
6048c2ecf20Sopenharmony_ci
6058c2ecf20Sopenharmony_cistatic void __exit tbf_module_exit(void)
6068c2ecf20Sopenharmony_ci{
6078c2ecf20Sopenharmony_ci	unregister_qdisc(&tbf_qdisc_ops);
6088c2ecf20Sopenharmony_ci}
6098c2ecf20Sopenharmony_cimodule_init(tbf_module_init)
6108c2ecf20Sopenharmony_cimodule_exit(tbf_module_exit)
6118c2ecf20Sopenharmony_ciMODULE_LICENSE("GPL");
612