162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * net/sched/sch_tbf.c Token Bucket Filter queue. 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 662306a36Sopenharmony_ci * Dmitry Torokhov <dtor@mail.ru> - allow attaching inner qdiscs - 762306a36Sopenharmony_ci * original idea by Martin Devera 862306a36Sopenharmony_ci */ 962306a36Sopenharmony_ci 1062306a36Sopenharmony_ci#include <linux/module.h> 1162306a36Sopenharmony_ci#include <linux/types.h> 1262306a36Sopenharmony_ci#include <linux/kernel.h> 1362306a36Sopenharmony_ci#include <linux/string.h> 1462306a36Sopenharmony_ci#include <linux/errno.h> 1562306a36Sopenharmony_ci#include <linux/skbuff.h> 1662306a36Sopenharmony_ci#include <net/gso.h> 1762306a36Sopenharmony_ci#include <net/netlink.h> 1862306a36Sopenharmony_ci#include <net/sch_generic.h> 1962306a36Sopenharmony_ci#include <net/pkt_cls.h> 2062306a36Sopenharmony_ci#include <net/pkt_sched.h> 2162306a36Sopenharmony_ci 2262306a36Sopenharmony_ci 2362306a36Sopenharmony_ci/* Simple Token Bucket Filter. 2462306a36Sopenharmony_ci ======================================= 2562306a36Sopenharmony_ci 2662306a36Sopenharmony_ci SOURCE. 2762306a36Sopenharmony_ci ------- 2862306a36Sopenharmony_ci 2962306a36Sopenharmony_ci None. 3062306a36Sopenharmony_ci 3162306a36Sopenharmony_ci Description. 3262306a36Sopenharmony_ci ------------ 3362306a36Sopenharmony_ci 3462306a36Sopenharmony_ci A data flow obeys TBF with rate R and depth B, if for any 3562306a36Sopenharmony_ci time interval t_i...t_f the number of transmitted bits 3662306a36Sopenharmony_ci does not exceed B + R*(t_f-t_i). 3762306a36Sopenharmony_ci 3862306a36Sopenharmony_ci Packetized version of this definition: 3962306a36Sopenharmony_ci The sequence of packets of sizes s_i served at moments t_i 4062306a36Sopenharmony_ci obeys TBF, if for any i<=k: 4162306a36Sopenharmony_ci 4262306a36Sopenharmony_ci s_i+....+s_k <= B + R*(t_k - t_i) 4362306a36Sopenharmony_ci 4462306a36Sopenharmony_ci Algorithm. 4562306a36Sopenharmony_ci ---------- 4662306a36Sopenharmony_ci 4762306a36Sopenharmony_ci Let N(t_i) be B/R initially and N(t) grow continuously with time as: 4862306a36Sopenharmony_ci 4962306a36Sopenharmony_ci N(t+delta) = min{B/R, N(t) + delta} 5062306a36Sopenharmony_ci 5162306a36Sopenharmony_ci If the first packet in queue has length S, it may be 5262306a36Sopenharmony_ci transmitted only at the time t_* when S/R <= N(t_*), 5362306a36Sopenharmony_ci and in this case N(t) jumps: 5462306a36Sopenharmony_ci 5562306a36Sopenharmony_ci N(t_* + 0) = N(t_* - 0) - S/R. 5662306a36Sopenharmony_ci 5762306a36Sopenharmony_ci 5862306a36Sopenharmony_ci 5962306a36Sopenharmony_ci Actually, QoS requires two TBF to be applied to a data stream. 6062306a36Sopenharmony_ci One of them controls steady state burst size, another 6162306a36Sopenharmony_ci one with rate P (peak rate) and depth M (equal to link MTU) 6262306a36Sopenharmony_ci limits bursts at a smaller time scale. 6362306a36Sopenharmony_ci 6462306a36Sopenharmony_ci It is easy to see that P>R, and B>M. If P is infinity, this double 6562306a36Sopenharmony_ci TBF is equivalent to a single one. 6662306a36Sopenharmony_ci 6762306a36Sopenharmony_ci When TBF works in reshaping mode, latency is estimated as: 6862306a36Sopenharmony_ci 6962306a36Sopenharmony_ci lat = max ((L-B)/R, (L-M)/P) 7062306a36Sopenharmony_ci 7162306a36Sopenharmony_ci 7262306a36Sopenharmony_ci NOTES. 7362306a36Sopenharmony_ci ------ 7462306a36Sopenharmony_ci 7562306a36Sopenharmony_ci If TBF throttles, it starts a watchdog timer, which will wake it up 7662306a36Sopenharmony_ci when it is ready to transmit. 7762306a36Sopenharmony_ci Note that the minimal timer resolution is 1/HZ. 7862306a36Sopenharmony_ci If no new packets arrive during this period, 7962306a36Sopenharmony_ci or if the device is not awaken by EOI for some previous packet, 8062306a36Sopenharmony_ci TBF can stop its activity for 1/HZ. 8162306a36Sopenharmony_ci 8262306a36Sopenharmony_ci 8362306a36Sopenharmony_ci This means, that with depth B, the maximal rate is 8462306a36Sopenharmony_ci 8562306a36Sopenharmony_ci R_crit = B*HZ 8662306a36Sopenharmony_ci 8762306a36Sopenharmony_ci F.e. for 10Mbit ethernet and HZ=100 the minimal allowed B is ~10Kbytes. 8862306a36Sopenharmony_ci 8962306a36Sopenharmony_ci Note that the peak rate TBF is much more tough: with MTU 1500 9062306a36Sopenharmony_ci P_crit = 150Kbytes/sec. So, if you need greater peak 9162306a36Sopenharmony_ci rates, use alpha with HZ=1000 :-) 9262306a36Sopenharmony_ci 9362306a36Sopenharmony_ci With classful TBF, limit is just kept for backwards compatibility. 9462306a36Sopenharmony_ci It is passed to the default bfifo qdisc - if the inner qdisc is 9562306a36Sopenharmony_ci changed the limit is not effective anymore. 9662306a36Sopenharmony_ci*/ 9762306a36Sopenharmony_ci 9862306a36Sopenharmony_cistruct tbf_sched_data { 9962306a36Sopenharmony_ci/* Parameters */ 10062306a36Sopenharmony_ci u32 limit; /* Maximal length of backlog: bytes */ 10162306a36Sopenharmony_ci u32 max_size; 10262306a36Sopenharmony_ci s64 buffer; /* Token bucket depth/rate: MUST BE >= MTU/B */ 10362306a36Sopenharmony_ci s64 mtu; 10462306a36Sopenharmony_ci struct psched_ratecfg rate; 10562306a36Sopenharmony_ci struct psched_ratecfg peak; 10662306a36Sopenharmony_ci 10762306a36Sopenharmony_ci/* Variables */ 10862306a36Sopenharmony_ci s64 tokens; /* Current number of B tokens */ 10962306a36Sopenharmony_ci s64 ptokens; /* Current number of P tokens */ 11062306a36Sopenharmony_ci s64 t_c; /* Time check-point */ 11162306a36Sopenharmony_ci struct Qdisc *qdisc; /* Inner qdisc, default - bfifo queue */ 11262306a36Sopenharmony_ci struct qdisc_watchdog watchdog; /* Watchdog timer */ 11362306a36Sopenharmony_ci}; 11462306a36Sopenharmony_ci 11562306a36Sopenharmony_ci 11662306a36Sopenharmony_ci/* Time to Length, convert time in ns to length in bytes 11762306a36Sopenharmony_ci * to determinate how many bytes can be sent in given time. 11862306a36Sopenharmony_ci */ 11962306a36Sopenharmony_cistatic u64 psched_ns_t2l(const struct psched_ratecfg *r, 12062306a36Sopenharmony_ci u64 time_in_ns) 12162306a36Sopenharmony_ci{ 12262306a36Sopenharmony_ci /* The formula is : 12362306a36Sopenharmony_ci * len = (time_in_ns * r->rate_bytes_ps) / NSEC_PER_SEC 12462306a36Sopenharmony_ci */ 12562306a36Sopenharmony_ci u64 len = time_in_ns * r->rate_bytes_ps; 12662306a36Sopenharmony_ci 12762306a36Sopenharmony_ci do_div(len, NSEC_PER_SEC); 12862306a36Sopenharmony_ci 12962306a36Sopenharmony_ci if (unlikely(r->linklayer == TC_LINKLAYER_ATM)) { 13062306a36Sopenharmony_ci do_div(len, 53); 13162306a36Sopenharmony_ci len = len * 48; 13262306a36Sopenharmony_ci } 13362306a36Sopenharmony_ci 13462306a36Sopenharmony_ci if (len > r->overhead) 13562306a36Sopenharmony_ci len -= r->overhead; 13662306a36Sopenharmony_ci else 13762306a36Sopenharmony_ci len = 0; 13862306a36Sopenharmony_ci 13962306a36Sopenharmony_ci return len; 14062306a36Sopenharmony_ci} 14162306a36Sopenharmony_ci 14262306a36Sopenharmony_cistatic void tbf_offload_change(struct Qdisc *sch) 14362306a36Sopenharmony_ci{ 14462306a36Sopenharmony_ci struct tbf_sched_data *q = qdisc_priv(sch); 14562306a36Sopenharmony_ci struct net_device *dev = qdisc_dev(sch); 14662306a36Sopenharmony_ci struct tc_tbf_qopt_offload qopt; 14762306a36Sopenharmony_ci 14862306a36Sopenharmony_ci if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) 14962306a36Sopenharmony_ci return; 15062306a36Sopenharmony_ci 15162306a36Sopenharmony_ci qopt.command = TC_TBF_REPLACE; 15262306a36Sopenharmony_ci qopt.handle = sch->handle; 15362306a36Sopenharmony_ci qopt.parent = sch->parent; 15462306a36Sopenharmony_ci qopt.replace_params.rate = q->rate; 15562306a36Sopenharmony_ci qopt.replace_params.max_size = q->max_size; 15662306a36Sopenharmony_ci qopt.replace_params.qstats = &sch->qstats; 15762306a36Sopenharmony_ci 15862306a36Sopenharmony_ci dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TBF, &qopt); 15962306a36Sopenharmony_ci} 16062306a36Sopenharmony_ci 16162306a36Sopenharmony_cistatic void tbf_offload_destroy(struct Qdisc *sch) 16262306a36Sopenharmony_ci{ 16362306a36Sopenharmony_ci struct net_device *dev = qdisc_dev(sch); 16462306a36Sopenharmony_ci struct tc_tbf_qopt_offload qopt; 16562306a36Sopenharmony_ci 16662306a36Sopenharmony_ci if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) 16762306a36Sopenharmony_ci return; 16862306a36Sopenharmony_ci 16962306a36Sopenharmony_ci qopt.command = TC_TBF_DESTROY; 17062306a36Sopenharmony_ci qopt.handle = sch->handle; 17162306a36Sopenharmony_ci qopt.parent = sch->parent; 17262306a36Sopenharmony_ci dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TBF, &qopt); 17362306a36Sopenharmony_ci} 17462306a36Sopenharmony_ci 17562306a36Sopenharmony_cistatic int tbf_offload_dump(struct Qdisc *sch) 17662306a36Sopenharmony_ci{ 17762306a36Sopenharmony_ci struct tc_tbf_qopt_offload qopt; 17862306a36Sopenharmony_ci 17962306a36Sopenharmony_ci qopt.command = TC_TBF_STATS; 18062306a36Sopenharmony_ci qopt.handle = sch->handle; 18162306a36Sopenharmony_ci qopt.parent = sch->parent; 18262306a36Sopenharmony_ci qopt.stats.bstats = &sch->bstats; 18362306a36Sopenharmony_ci qopt.stats.qstats = &sch->qstats; 18462306a36Sopenharmony_ci 18562306a36Sopenharmony_ci return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_TBF, &qopt); 18662306a36Sopenharmony_ci} 18762306a36Sopenharmony_ci 18862306a36Sopenharmony_cistatic void tbf_offload_graft(struct Qdisc *sch, struct Qdisc *new, 18962306a36Sopenharmony_ci struct Qdisc *old, struct netlink_ext_ack *extack) 19062306a36Sopenharmony_ci{ 19162306a36Sopenharmony_ci struct tc_tbf_qopt_offload graft_offload = { 19262306a36Sopenharmony_ci .handle = sch->handle, 19362306a36Sopenharmony_ci .parent = sch->parent, 19462306a36Sopenharmony_ci .child_handle = new->handle, 19562306a36Sopenharmony_ci .command = TC_TBF_GRAFT, 19662306a36Sopenharmony_ci }; 19762306a36Sopenharmony_ci 19862306a36Sopenharmony_ci qdisc_offload_graft_helper(qdisc_dev(sch), sch, new, old, 19962306a36Sopenharmony_ci TC_SETUP_QDISC_TBF, &graft_offload, extack); 20062306a36Sopenharmony_ci} 20162306a36Sopenharmony_ci 20262306a36Sopenharmony_ci/* GSO packet is too big, segment it so that tbf can transmit 20362306a36Sopenharmony_ci * each segment in time 20462306a36Sopenharmony_ci */ 20562306a36Sopenharmony_cistatic int tbf_segment(struct sk_buff *skb, struct Qdisc *sch, 20662306a36Sopenharmony_ci struct sk_buff **to_free) 20762306a36Sopenharmony_ci{ 20862306a36Sopenharmony_ci struct tbf_sched_data *q = qdisc_priv(sch); 20962306a36Sopenharmony_ci struct sk_buff *segs, *nskb; 21062306a36Sopenharmony_ci netdev_features_t features = netif_skb_features(skb); 21162306a36Sopenharmony_ci unsigned int len = 0, prev_len = qdisc_pkt_len(skb); 21262306a36Sopenharmony_ci int ret, nb; 21362306a36Sopenharmony_ci 21462306a36Sopenharmony_ci segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); 21562306a36Sopenharmony_ci 21662306a36Sopenharmony_ci if (IS_ERR_OR_NULL(segs)) 21762306a36Sopenharmony_ci return qdisc_drop(skb, sch, to_free); 21862306a36Sopenharmony_ci 21962306a36Sopenharmony_ci nb = 0; 22062306a36Sopenharmony_ci skb_list_walk_safe(segs, segs, nskb) { 22162306a36Sopenharmony_ci skb_mark_not_on_list(segs); 22262306a36Sopenharmony_ci qdisc_skb_cb(segs)->pkt_len = segs->len; 22362306a36Sopenharmony_ci len += segs->len; 22462306a36Sopenharmony_ci ret = qdisc_enqueue(segs, q->qdisc, to_free); 22562306a36Sopenharmony_ci if (ret != NET_XMIT_SUCCESS) { 22662306a36Sopenharmony_ci if (net_xmit_drop_count(ret)) 22762306a36Sopenharmony_ci qdisc_qstats_drop(sch); 22862306a36Sopenharmony_ci } else { 22962306a36Sopenharmony_ci nb++; 23062306a36Sopenharmony_ci } 23162306a36Sopenharmony_ci } 23262306a36Sopenharmony_ci sch->q.qlen += nb; 23362306a36Sopenharmony_ci if (nb > 1) 23462306a36Sopenharmony_ci qdisc_tree_reduce_backlog(sch, 1 - nb, prev_len - len); 23562306a36Sopenharmony_ci consume_skb(skb); 23662306a36Sopenharmony_ci return nb > 0 ? NET_XMIT_SUCCESS : NET_XMIT_DROP; 23762306a36Sopenharmony_ci} 23862306a36Sopenharmony_ci 23962306a36Sopenharmony_cistatic int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch, 24062306a36Sopenharmony_ci struct sk_buff **to_free) 24162306a36Sopenharmony_ci{ 24262306a36Sopenharmony_ci struct tbf_sched_data *q = qdisc_priv(sch); 24362306a36Sopenharmony_ci unsigned int len = qdisc_pkt_len(skb); 24462306a36Sopenharmony_ci int ret; 24562306a36Sopenharmony_ci 24662306a36Sopenharmony_ci if (qdisc_pkt_len(skb) > q->max_size) { 24762306a36Sopenharmony_ci if (skb_is_gso(skb) && 24862306a36Sopenharmony_ci skb_gso_validate_mac_len(skb, q->max_size)) 24962306a36Sopenharmony_ci return tbf_segment(skb, sch, to_free); 25062306a36Sopenharmony_ci return qdisc_drop(skb, sch, to_free); 25162306a36Sopenharmony_ci } 25262306a36Sopenharmony_ci ret = qdisc_enqueue(skb, q->qdisc, to_free); 25362306a36Sopenharmony_ci if (ret != NET_XMIT_SUCCESS) { 25462306a36Sopenharmony_ci if (net_xmit_drop_count(ret)) 25562306a36Sopenharmony_ci qdisc_qstats_drop(sch); 25662306a36Sopenharmony_ci return ret; 25762306a36Sopenharmony_ci } 25862306a36Sopenharmony_ci 25962306a36Sopenharmony_ci sch->qstats.backlog += len; 26062306a36Sopenharmony_ci sch->q.qlen++; 26162306a36Sopenharmony_ci return NET_XMIT_SUCCESS; 26262306a36Sopenharmony_ci} 26362306a36Sopenharmony_ci 26462306a36Sopenharmony_cistatic bool tbf_peak_present(const struct tbf_sched_data *q) 26562306a36Sopenharmony_ci{ 26662306a36Sopenharmony_ci return q->peak.rate_bytes_ps; 26762306a36Sopenharmony_ci} 26862306a36Sopenharmony_ci 26962306a36Sopenharmony_cistatic struct sk_buff *tbf_dequeue(struct Qdisc *sch) 27062306a36Sopenharmony_ci{ 27162306a36Sopenharmony_ci struct tbf_sched_data *q = qdisc_priv(sch); 27262306a36Sopenharmony_ci struct sk_buff *skb; 27362306a36Sopenharmony_ci 27462306a36Sopenharmony_ci skb = q->qdisc->ops->peek(q->qdisc); 27562306a36Sopenharmony_ci 27662306a36Sopenharmony_ci if (skb) { 27762306a36Sopenharmony_ci s64 now; 27862306a36Sopenharmony_ci s64 toks; 27962306a36Sopenharmony_ci s64 ptoks = 0; 28062306a36Sopenharmony_ci unsigned int len = qdisc_pkt_len(skb); 28162306a36Sopenharmony_ci 28262306a36Sopenharmony_ci now = ktime_get_ns(); 28362306a36Sopenharmony_ci toks = min_t(s64, now - q->t_c, q->buffer); 28462306a36Sopenharmony_ci 28562306a36Sopenharmony_ci if (tbf_peak_present(q)) { 28662306a36Sopenharmony_ci ptoks = toks + q->ptokens; 28762306a36Sopenharmony_ci if (ptoks > q->mtu) 28862306a36Sopenharmony_ci ptoks = q->mtu; 28962306a36Sopenharmony_ci ptoks -= (s64) psched_l2t_ns(&q->peak, len); 29062306a36Sopenharmony_ci } 29162306a36Sopenharmony_ci toks += q->tokens; 29262306a36Sopenharmony_ci if (toks > q->buffer) 29362306a36Sopenharmony_ci toks = q->buffer; 29462306a36Sopenharmony_ci toks -= (s64) psched_l2t_ns(&q->rate, len); 29562306a36Sopenharmony_ci 29662306a36Sopenharmony_ci if ((toks|ptoks) >= 0) { 29762306a36Sopenharmony_ci skb = qdisc_dequeue_peeked(q->qdisc); 29862306a36Sopenharmony_ci if (unlikely(!skb)) 29962306a36Sopenharmony_ci return NULL; 30062306a36Sopenharmony_ci 30162306a36Sopenharmony_ci q->t_c = now; 30262306a36Sopenharmony_ci q->tokens = toks; 30362306a36Sopenharmony_ci q->ptokens = ptoks; 30462306a36Sopenharmony_ci qdisc_qstats_backlog_dec(sch, skb); 30562306a36Sopenharmony_ci sch->q.qlen--; 30662306a36Sopenharmony_ci qdisc_bstats_update(sch, skb); 30762306a36Sopenharmony_ci return skb; 30862306a36Sopenharmony_ci } 30962306a36Sopenharmony_ci 31062306a36Sopenharmony_ci qdisc_watchdog_schedule_ns(&q->watchdog, 31162306a36Sopenharmony_ci now + max_t(long, -toks, -ptoks)); 31262306a36Sopenharmony_ci 31362306a36Sopenharmony_ci /* Maybe we have a shorter packet in the queue, 31462306a36Sopenharmony_ci which can be sent now. It sounds cool, 31562306a36Sopenharmony_ci but, however, this is wrong in principle. 31662306a36Sopenharmony_ci We MUST NOT reorder packets under these circumstances. 31762306a36Sopenharmony_ci 31862306a36Sopenharmony_ci Really, if we split the flow into independent 31962306a36Sopenharmony_ci subflows, it would be a very good solution. 32062306a36Sopenharmony_ci This is the main idea of all FQ algorithms 32162306a36Sopenharmony_ci (cf. CSZ, HPFQ, HFSC) 32262306a36Sopenharmony_ci */ 32362306a36Sopenharmony_ci 32462306a36Sopenharmony_ci qdisc_qstats_overlimit(sch); 32562306a36Sopenharmony_ci } 32662306a36Sopenharmony_ci return NULL; 32762306a36Sopenharmony_ci} 32862306a36Sopenharmony_ci 32962306a36Sopenharmony_cistatic void tbf_reset(struct Qdisc *sch) 33062306a36Sopenharmony_ci{ 33162306a36Sopenharmony_ci struct tbf_sched_data *q = qdisc_priv(sch); 33262306a36Sopenharmony_ci 33362306a36Sopenharmony_ci qdisc_reset(q->qdisc); 33462306a36Sopenharmony_ci q->t_c = ktime_get_ns(); 33562306a36Sopenharmony_ci q->tokens = q->buffer; 33662306a36Sopenharmony_ci q->ptokens = q->mtu; 33762306a36Sopenharmony_ci qdisc_watchdog_cancel(&q->watchdog); 33862306a36Sopenharmony_ci} 33962306a36Sopenharmony_ci 34062306a36Sopenharmony_cistatic const struct nla_policy tbf_policy[TCA_TBF_MAX + 1] = { 34162306a36Sopenharmony_ci [TCA_TBF_PARMS] = { .len = sizeof(struct tc_tbf_qopt) }, 34262306a36Sopenharmony_ci [TCA_TBF_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE }, 34362306a36Sopenharmony_ci [TCA_TBF_PTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE }, 34462306a36Sopenharmony_ci [TCA_TBF_RATE64] = { .type = NLA_U64 }, 34562306a36Sopenharmony_ci [TCA_TBF_PRATE64] = { .type = NLA_U64 }, 34662306a36Sopenharmony_ci [TCA_TBF_BURST] = { .type = NLA_U32 }, 34762306a36Sopenharmony_ci [TCA_TBF_PBURST] = { .type = NLA_U32 }, 34862306a36Sopenharmony_ci}; 34962306a36Sopenharmony_ci 35062306a36Sopenharmony_cistatic int tbf_change(struct Qdisc *sch, struct nlattr *opt, 35162306a36Sopenharmony_ci struct netlink_ext_ack *extack) 35262306a36Sopenharmony_ci{ 35362306a36Sopenharmony_ci int err; 35462306a36Sopenharmony_ci struct tbf_sched_data *q = qdisc_priv(sch); 35562306a36Sopenharmony_ci struct nlattr *tb[TCA_TBF_MAX + 1]; 35662306a36Sopenharmony_ci struct tc_tbf_qopt *qopt; 35762306a36Sopenharmony_ci struct Qdisc *child = NULL; 35862306a36Sopenharmony_ci struct Qdisc *old = NULL; 35962306a36Sopenharmony_ci struct psched_ratecfg rate; 36062306a36Sopenharmony_ci struct psched_ratecfg peak; 36162306a36Sopenharmony_ci u64 max_size; 36262306a36Sopenharmony_ci s64 buffer, mtu; 36362306a36Sopenharmony_ci u64 rate64 = 0, prate64 = 0; 36462306a36Sopenharmony_ci 36562306a36Sopenharmony_ci err = nla_parse_nested_deprecated(tb, TCA_TBF_MAX, opt, tbf_policy, 36662306a36Sopenharmony_ci NULL); 36762306a36Sopenharmony_ci if (err < 0) 36862306a36Sopenharmony_ci return err; 36962306a36Sopenharmony_ci 37062306a36Sopenharmony_ci err = -EINVAL; 37162306a36Sopenharmony_ci if (tb[TCA_TBF_PARMS] == NULL) 37262306a36Sopenharmony_ci goto done; 37362306a36Sopenharmony_ci 37462306a36Sopenharmony_ci qopt = nla_data(tb[TCA_TBF_PARMS]); 37562306a36Sopenharmony_ci if (qopt->rate.linklayer == TC_LINKLAYER_UNAWARE) 37662306a36Sopenharmony_ci qdisc_put_rtab(qdisc_get_rtab(&qopt->rate, 37762306a36Sopenharmony_ci tb[TCA_TBF_RTAB], 37862306a36Sopenharmony_ci NULL)); 37962306a36Sopenharmony_ci 38062306a36Sopenharmony_ci if (qopt->peakrate.linklayer == TC_LINKLAYER_UNAWARE) 38162306a36Sopenharmony_ci qdisc_put_rtab(qdisc_get_rtab(&qopt->peakrate, 38262306a36Sopenharmony_ci tb[TCA_TBF_PTAB], 38362306a36Sopenharmony_ci NULL)); 38462306a36Sopenharmony_ci 38562306a36Sopenharmony_ci buffer = min_t(u64, PSCHED_TICKS2NS(qopt->buffer), ~0U); 38662306a36Sopenharmony_ci mtu = min_t(u64, PSCHED_TICKS2NS(qopt->mtu), ~0U); 38762306a36Sopenharmony_ci 38862306a36Sopenharmony_ci if (tb[TCA_TBF_RATE64]) 38962306a36Sopenharmony_ci rate64 = nla_get_u64(tb[TCA_TBF_RATE64]); 39062306a36Sopenharmony_ci psched_ratecfg_precompute(&rate, &qopt->rate, rate64); 39162306a36Sopenharmony_ci 39262306a36Sopenharmony_ci if (tb[TCA_TBF_BURST]) { 39362306a36Sopenharmony_ci max_size = nla_get_u32(tb[TCA_TBF_BURST]); 39462306a36Sopenharmony_ci buffer = psched_l2t_ns(&rate, max_size); 39562306a36Sopenharmony_ci } else { 39662306a36Sopenharmony_ci max_size = min_t(u64, psched_ns_t2l(&rate, buffer), ~0U); 39762306a36Sopenharmony_ci } 39862306a36Sopenharmony_ci 39962306a36Sopenharmony_ci if (qopt->peakrate.rate) { 40062306a36Sopenharmony_ci if (tb[TCA_TBF_PRATE64]) 40162306a36Sopenharmony_ci prate64 = nla_get_u64(tb[TCA_TBF_PRATE64]); 40262306a36Sopenharmony_ci psched_ratecfg_precompute(&peak, &qopt->peakrate, prate64); 40362306a36Sopenharmony_ci if (peak.rate_bytes_ps <= rate.rate_bytes_ps) { 40462306a36Sopenharmony_ci pr_warn_ratelimited("sch_tbf: peakrate %llu is lower than or equals to rate %llu !\n", 40562306a36Sopenharmony_ci peak.rate_bytes_ps, rate.rate_bytes_ps); 40662306a36Sopenharmony_ci err = -EINVAL; 40762306a36Sopenharmony_ci goto done; 40862306a36Sopenharmony_ci } 40962306a36Sopenharmony_ci 41062306a36Sopenharmony_ci if (tb[TCA_TBF_PBURST]) { 41162306a36Sopenharmony_ci u32 pburst = nla_get_u32(tb[TCA_TBF_PBURST]); 41262306a36Sopenharmony_ci max_size = min_t(u32, max_size, pburst); 41362306a36Sopenharmony_ci mtu = psched_l2t_ns(&peak, pburst); 41462306a36Sopenharmony_ci } else { 41562306a36Sopenharmony_ci max_size = min_t(u64, max_size, psched_ns_t2l(&peak, mtu)); 41662306a36Sopenharmony_ci } 41762306a36Sopenharmony_ci } else { 41862306a36Sopenharmony_ci memset(&peak, 0, sizeof(peak)); 41962306a36Sopenharmony_ci } 42062306a36Sopenharmony_ci 42162306a36Sopenharmony_ci if (max_size < psched_mtu(qdisc_dev(sch))) 42262306a36Sopenharmony_ci pr_warn_ratelimited("sch_tbf: burst %llu is lower than device %s mtu (%u) !\n", 42362306a36Sopenharmony_ci max_size, qdisc_dev(sch)->name, 42462306a36Sopenharmony_ci psched_mtu(qdisc_dev(sch))); 42562306a36Sopenharmony_ci 42662306a36Sopenharmony_ci if (!max_size) { 42762306a36Sopenharmony_ci err = -EINVAL; 42862306a36Sopenharmony_ci goto done; 42962306a36Sopenharmony_ci } 43062306a36Sopenharmony_ci 43162306a36Sopenharmony_ci if (q->qdisc != &noop_qdisc) { 43262306a36Sopenharmony_ci err = fifo_set_limit(q->qdisc, qopt->limit); 43362306a36Sopenharmony_ci if (err) 43462306a36Sopenharmony_ci goto done; 43562306a36Sopenharmony_ci } else if (qopt->limit > 0) { 43662306a36Sopenharmony_ci child = fifo_create_dflt(sch, &bfifo_qdisc_ops, qopt->limit, 43762306a36Sopenharmony_ci extack); 43862306a36Sopenharmony_ci if (IS_ERR(child)) { 43962306a36Sopenharmony_ci err = PTR_ERR(child); 44062306a36Sopenharmony_ci goto done; 44162306a36Sopenharmony_ci } 44262306a36Sopenharmony_ci 44362306a36Sopenharmony_ci /* child is fifo, no need to check for noop_qdisc */ 44462306a36Sopenharmony_ci qdisc_hash_add(child, true); 44562306a36Sopenharmony_ci } 44662306a36Sopenharmony_ci 44762306a36Sopenharmony_ci sch_tree_lock(sch); 44862306a36Sopenharmony_ci if (child) { 44962306a36Sopenharmony_ci qdisc_tree_flush_backlog(q->qdisc); 45062306a36Sopenharmony_ci old = q->qdisc; 45162306a36Sopenharmony_ci q->qdisc = child; 45262306a36Sopenharmony_ci } 45362306a36Sopenharmony_ci q->limit = qopt->limit; 45462306a36Sopenharmony_ci if (tb[TCA_TBF_PBURST]) 45562306a36Sopenharmony_ci q->mtu = mtu; 45662306a36Sopenharmony_ci else 45762306a36Sopenharmony_ci q->mtu = PSCHED_TICKS2NS(qopt->mtu); 45862306a36Sopenharmony_ci q->max_size = max_size; 45962306a36Sopenharmony_ci if (tb[TCA_TBF_BURST]) 46062306a36Sopenharmony_ci q->buffer = buffer; 46162306a36Sopenharmony_ci else 46262306a36Sopenharmony_ci q->buffer = PSCHED_TICKS2NS(qopt->buffer); 46362306a36Sopenharmony_ci q->tokens = q->buffer; 46462306a36Sopenharmony_ci q->ptokens = q->mtu; 46562306a36Sopenharmony_ci 46662306a36Sopenharmony_ci memcpy(&q->rate, &rate, sizeof(struct psched_ratecfg)); 46762306a36Sopenharmony_ci memcpy(&q->peak, &peak, sizeof(struct psched_ratecfg)); 46862306a36Sopenharmony_ci 46962306a36Sopenharmony_ci sch_tree_unlock(sch); 47062306a36Sopenharmony_ci qdisc_put(old); 47162306a36Sopenharmony_ci err = 0; 47262306a36Sopenharmony_ci 47362306a36Sopenharmony_ci tbf_offload_change(sch); 47462306a36Sopenharmony_cidone: 47562306a36Sopenharmony_ci return err; 47662306a36Sopenharmony_ci} 47762306a36Sopenharmony_ci 47862306a36Sopenharmony_cistatic int tbf_init(struct Qdisc *sch, struct nlattr *opt, 47962306a36Sopenharmony_ci struct netlink_ext_ack *extack) 48062306a36Sopenharmony_ci{ 48162306a36Sopenharmony_ci struct tbf_sched_data *q = qdisc_priv(sch); 48262306a36Sopenharmony_ci 48362306a36Sopenharmony_ci qdisc_watchdog_init(&q->watchdog, sch); 48462306a36Sopenharmony_ci q->qdisc = &noop_qdisc; 48562306a36Sopenharmony_ci 48662306a36Sopenharmony_ci if (!opt) 48762306a36Sopenharmony_ci return -EINVAL; 48862306a36Sopenharmony_ci 48962306a36Sopenharmony_ci q->t_c = ktime_get_ns(); 49062306a36Sopenharmony_ci 49162306a36Sopenharmony_ci return tbf_change(sch, opt, extack); 49262306a36Sopenharmony_ci} 49362306a36Sopenharmony_ci 49462306a36Sopenharmony_cistatic void tbf_destroy(struct Qdisc *sch) 49562306a36Sopenharmony_ci{ 49662306a36Sopenharmony_ci struct tbf_sched_data *q = qdisc_priv(sch); 49762306a36Sopenharmony_ci 49862306a36Sopenharmony_ci qdisc_watchdog_cancel(&q->watchdog); 49962306a36Sopenharmony_ci tbf_offload_destroy(sch); 50062306a36Sopenharmony_ci qdisc_put(q->qdisc); 50162306a36Sopenharmony_ci} 50262306a36Sopenharmony_ci 50362306a36Sopenharmony_cistatic int tbf_dump(struct Qdisc *sch, struct sk_buff *skb) 50462306a36Sopenharmony_ci{ 50562306a36Sopenharmony_ci struct tbf_sched_data *q = qdisc_priv(sch); 50662306a36Sopenharmony_ci struct nlattr *nest; 50762306a36Sopenharmony_ci struct tc_tbf_qopt opt; 50862306a36Sopenharmony_ci int err; 50962306a36Sopenharmony_ci 51062306a36Sopenharmony_ci err = tbf_offload_dump(sch); 51162306a36Sopenharmony_ci if (err) 51262306a36Sopenharmony_ci return err; 51362306a36Sopenharmony_ci 51462306a36Sopenharmony_ci nest = nla_nest_start_noflag(skb, TCA_OPTIONS); 51562306a36Sopenharmony_ci if (nest == NULL) 51662306a36Sopenharmony_ci goto nla_put_failure; 51762306a36Sopenharmony_ci 51862306a36Sopenharmony_ci opt.limit = q->limit; 51962306a36Sopenharmony_ci psched_ratecfg_getrate(&opt.rate, &q->rate); 52062306a36Sopenharmony_ci if (tbf_peak_present(q)) 52162306a36Sopenharmony_ci psched_ratecfg_getrate(&opt.peakrate, &q->peak); 52262306a36Sopenharmony_ci else 52362306a36Sopenharmony_ci memset(&opt.peakrate, 0, sizeof(opt.peakrate)); 52462306a36Sopenharmony_ci opt.mtu = PSCHED_NS2TICKS(q->mtu); 52562306a36Sopenharmony_ci opt.buffer = PSCHED_NS2TICKS(q->buffer); 52662306a36Sopenharmony_ci if (nla_put(skb, TCA_TBF_PARMS, sizeof(opt), &opt)) 52762306a36Sopenharmony_ci goto nla_put_failure; 52862306a36Sopenharmony_ci if (q->rate.rate_bytes_ps >= (1ULL << 32) && 52962306a36Sopenharmony_ci nla_put_u64_64bit(skb, TCA_TBF_RATE64, q->rate.rate_bytes_ps, 53062306a36Sopenharmony_ci TCA_TBF_PAD)) 53162306a36Sopenharmony_ci goto nla_put_failure; 53262306a36Sopenharmony_ci if (tbf_peak_present(q) && 53362306a36Sopenharmony_ci q->peak.rate_bytes_ps >= (1ULL << 32) && 53462306a36Sopenharmony_ci nla_put_u64_64bit(skb, TCA_TBF_PRATE64, q->peak.rate_bytes_ps, 53562306a36Sopenharmony_ci TCA_TBF_PAD)) 53662306a36Sopenharmony_ci goto nla_put_failure; 53762306a36Sopenharmony_ci 53862306a36Sopenharmony_ci return nla_nest_end(skb, nest); 53962306a36Sopenharmony_ci 54062306a36Sopenharmony_cinla_put_failure: 54162306a36Sopenharmony_ci nla_nest_cancel(skb, nest); 54262306a36Sopenharmony_ci return -1; 54362306a36Sopenharmony_ci} 54462306a36Sopenharmony_ci 54562306a36Sopenharmony_cistatic int tbf_dump_class(struct Qdisc *sch, unsigned long cl, 54662306a36Sopenharmony_ci struct sk_buff *skb, struct tcmsg *tcm) 54762306a36Sopenharmony_ci{ 54862306a36Sopenharmony_ci struct tbf_sched_data *q = qdisc_priv(sch); 54962306a36Sopenharmony_ci 55062306a36Sopenharmony_ci tcm->tcm_handle |= TC_H_MIN(1); 55162306a36Sopenharmony_ci tcm->tcm_info = q->qdisc->handle; 55262306a36Sopenharmony_ci 55362306a36Sopenharmony_ci return 0; 55462306a36Sopenharmony_ci} 55562306a36Sopenharmony_ci 55662306a36Sopenharmony_cistatic int tbf_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, 55762306a36Sopenharmony_ci struct Qdisc **old, struct netlink_ext_ack *extack) 55862306a36Sopenharmony_ci{ 55962306a36Sopenharmony_ci struct tbf_sched_data *q = qdisc_priv(sch); 56062306a36Sopenharmony_ci 56162306a36Sopenharmony_ci if (new == NULL) 56262306a36Sopenharmony_ci new = &noop_qdisc; 56362306a36Sopenharmony_ci 56462306a36Sopenharmony_ci *old = qdisc_replace(sch, new, &q->qdisc); 56562306a36Sopenharmony_ci 56662306a36Sopenharmony_ci tbf_offload_graft(sch, new, *old, extack); 56762306a36Sopenharmony_ci return 0; 56862306a36Sopenharmony_ci} 56962306a36Sopenharmony_ci 57062306a36Sopenharmony_cistatic struct Qdisc *tbf_leaf(struct Qdisc *sch, unsigned long arg) 57162306a36Sopenharmony_ci{ 57262306a36Sopenharmony_ci struct tbf_sched_data *q = qdisc_priv(sch); 57362306a36Sopenharmony_ci return q->qdisc; 57462306a36Sopenharmony_ci} 57562306a36Sopenharmony_ci 57662306a36Sopenharmony_cistatic unsigned long tbf_find(struct Qdisc *sch, u32 classid) 57762306a36Sopenharmony_ci{ 57862306a36Sopenharmony_ci return 1; 57962306a36Sopenharmony_ci} 58062306a36Sopenharmony_ci 58162306a36Sopenharmony_cistatic void tbf_walk(struct Qdisc *sch, struct qdisc_walker *walker) 58262306a36Sopenharmony_ci{ 58362306a36Sopenharmony_ci if (!walker->stop) { 58462306a36Sopenharmony_ci tc_qdisc_stats_dump(sch, 1, walker); 58562306a36Sopenharmony_ci } 58662306a36Sopenharmony_ci} 58762306a36Sopenharmony_ci 58862306a36Sopenharmony_cistatic const struct Qdisc_class_ops tbf_class_ops = { 58962306a36Sopenharmony_ci .graft = tbf_graft, 59062306a36Sopenharmony_ci .leaf = tbf_leaf, 59162306a36Sopenharmony_ci .find = tbf_find, 59262306a36Sopenharmony_ci .walk = tbf_walk, 59362306a36Sopenharmony_ci .dump = tbf_dump_class, 59462306a36Sopenharmony_ci}; 59562306a36Sopenharmony_ci 59662306a36Sopenharmony_cistatic struct Qdisc_ops tbf_qdisc_ops __read_mostly = { 59762306a36Sopenharmony_ci .next = NULL, 59862306a36Sopenharmony_ci .cl_ops = &tbf_class_ops, 59962306a36Sopenharmony_ci .id = "tbf", 60062306a36Sopenharmony_ci .priv_size = sizeof(struct tbf_sched_data), 60162306a36Sopenharmony_ci .enqueue = tbf_enqueue, 60262306a36Sopenharmony_ci .dequeue = tbf_dequeue, 60362306a36Sopenharmony_ci .peek = qdisc_peek_dequeued, 60462306a36Sopenharmony_ci .init = tbf_init, 60562306a36Sopenharmony_ci .reset = tbf_reset, 60662306a36Sopenharmony_ci .destroy = tbf_destroy, 60762306a36Sopenharmony_ci .change = tbf_change, 60862306a36Sopenharmony_ci .dump = tbf_dump, 60962306a36Sopenharmony_ci .owner = THIS_MODULE, 61062306a36Sopenharmony_ci}; 61162306a36Sopenharmony_ci 61262306a36Sopenharmony_cistatic int __init tbf_module_init(void) 61362306a36Sopenharmony_ci{ 61462306a36Sopenharmony_ci return register_qdisc(&tbf_qdisc_ops); 61562306a36Sopenharmony_ci} 61662306a36Sopenharmony_ci 61762306a36Sopenharmony_cistatic void __exit tbf_module_exit(void) 61862306a36Sopenharmony_ci{ 61962306a36Sopenharmony_ci unregister_qdisc(&tbf_qdisc_ops); 62062306a36Sopenharmony_ci} 62162306a36Sopenharmony_cimodule_init(tbf_module_init) 62262306a36Sopenharmony_cimodule_exit(tbf_module_exit) 62362306a36Sopenharmony_ciMODULE_LICENSE("GPL"); 624