18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * net/sched/sch_tbf.c Token Bucket Filter queue. 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 68c2ecf20Sopenharmony_ci * Dmitry Torokhov <dtor@mail.ru> - allow attaching inner qdiscs - 78c2ecf20Sopenharmony_ci * original idea by Martin Devera 88c2ecf20Sopenharmony_ci */ 98c2ecf20Sopenharmony_ci 108c2ecf20Sopenharmony_ci#include <linux/module.h> 118c2ecf20Sopenharmony_ci#include <linux/types.h> 128c2ecf20Sopenharmony_ci#include <linux/kernel.h> 138c2ecf20Sopenharmony_ci#include <linux/string.h> 148c2ecf20Sopenharmony_ci#include <linux/errno.h> 158c2ecf20Sopenharmony_ci#include <linux/skbuff.h> 168c2ecf20Sopenharmony_ci#include <net/netlink.h> 178c2ecf20Sopenharmony_ci#include <net/sch_generic.h> 188c2ecf20Sopenharmony_ci#include <net/pkt_cls.h> 198c2ecf20Sopenharmony_ci#include <net/pkt_sched.h> 208c2ecf20Sopenharmony_ci 218c2ecf20Sopenharmony_ci 228c2ecf20Sopenharmony_ci/* Simple Token Bucket Filter. 238c2ecf20Sopenharmony_ci ======================================= 248c2ecf20Sopenharmony_ci 258c2ecf20Sopenharmony_ci SOURCE. 268c2ecf20Sopenharmony_ci ------- 278c2ecf20Sopenharmony_ci 288c2ecf20Sopenharmony_ci None. 298c2ecf20Sopenharmony_ci 308c2ecf20Sopenharmony_ci Description. 318c2ecf20Sopenharmony_ci ------------ 328c2ecf20Sopenharmony_ci 338c2ecf20Sopenharmony_ci A data flow obeys TBF with rate R and depth B, if for any 348c2ecf20Sopenharmony_ci time interval t_i...t_f the number of transmitted bits 358c2ecf20Sopenharmony_ci does not exceed B + R*(t_f-t_i). 368c2ecf20Sopenharmony_ci 378c2ecf20Sopenharmony_ci Packetized version of this definition: 388c2ecf20Sopenharmony_ci The sequence of packets of sizes s_i served at moments t_i 398c2ecf20Sopenharmony_ci obeys TBF, if for any i<=k: 408c2ecf20Sopenharmony_ci 418c2ecf20Sopenharmony_ci s_i+....+s_k <= B + R*(t_k - t_i) 428c2ecf20Sopenharmony_ci 438c2ecf20Sopenharmony_ci Algorithm. 448c2ecf20Sopenharmony_ci ---------- 458c2ecf20Sopenharmony_ci 468c2ecf20Sopenharmony_ci Let N(t_i) be B/R initially and N(t) grow continuously with time as: 478c2ecf20Sopenharmony_ci 488c2ecf20Sopenharmony_ci N(t+delta) = min{B/R, N(t) + delta} 498c2ecf20Sopenharmony_ci 508c2ecf20Sopenharmony_ci If the first packet in queue has length S, it may be 518c2ecf20Sopenharmony_ci transmitted only at the time t_* when S/R <= N(t_*), 528c2ecf20Sopenharmony_ci and in this case N(t) jumps: 538c2ecf20Sopenharmony_ci 548c2ecf20Sopenharmony_ci N(t_* + 0) = N(t_* - 0) - S/R. 558c2ecf20Sopenharmony_ci 568c2ecf20Sopenharmony_ci 578c2ecf20Sopenharmony_ci 588c2ecf20Sopenharmony_ci Actually, QoS requires two TBF to be applied to a data stream. 598c2ecf20Sopenharmony_ci One of them controls steady state burst size, another 608c2ecf20Sopenharmony_ci one with rate P (peak rate) and depth M (equal to link MTU) 618c2ecf20Sopenharmony_ci limits bursts at a smaller time scale. 628c2ecf20Sopenharmony_ci 638c2ecf20Sopenharmony_ci It is easy to see that P>R, and B>M. If P is infinity, this double 648c2ecf20Sopenharmony_ci TBF is equivalent to a single one. 658c2ecf20Sopenharmony_ci 668c2ecf20Sopenharmony_ci When TBF works in reshaping mode, latency is estimated as: 678c2ecf20Sopenharmony_ci 688c2ecf20Sopenharmony_ci lat = max ((L-B)/R, (L-M)/P) 698c2ecf20Sopenharmony_ci 708c2ecf20Sopenharmony_ci 718c2ecf20Sopenharmony_ci NOTES. 728c2ecf20Sopenharmony_ci ------ 738c2ecf20Sopenharmony_ci 748c2ecf20Sopenharmony_ci If TBF throttles, it starts a watchdog timer, which will wake it up 758c2ecf20Sopenharmony_ci when it is ready to transmit. 768c2ecf20Sopenharmony_ci Note that the minimal timer resolution is 1/HZ. 778c2ecf20Sopenharmony_ci If no new packets arrive during this period, 788c2ecf20Sopenharmony_ci or if the device is not awaken by EOI for some previous packet, 798c2ecf20Sopenharmony_ci TBF can stop its activity for 1/HZ. 808c2ecf20Sopenharmony_ci 818c2ecf20Sopenharmony_ci 828c2ecf20Sopenharmony_ci This means, that with depth B, the maximal rate is 838c2ecf20Sopenharmony_ci 848c2ecf20Sopenharmony_ci R_crit = B*HZ 858c2ecf20Sopenharmony_ci 868c2ecf20Sopenharmony_ci F.e. for 10Mbit ethernet and HZ=100 the minimal allowed B is ~10Kbytes. 878c2ecf20Sopenharmony_ci 888c2ecf20Sopenharmony_ci Note that the peak rate TBF is much more tough: with MTU 1500 898c2ecf20Sopenharmony_ci P_crit = 150Kbytes/sec. So, if you need greater peak 908c2ecf20Sopenharmony_ci rates, use alpha with HZ=1000 :-) 918c2ecf20Sopenharmony_ci 928c2ecf20Sopenharmony_ci With classful TBF, limit is just kept for backwards compatibility. 938c2ecf20Sopenharmony_ci It is passed to the default bfifo qdisc - if the inner qdisc is 948c2ecf20Sopenharmony_ci changed the limit is not effective anymore. 958c2ecf20Sopenharmony_ci*/ 968c2ecf20Sopenharmony_ci 978c2ecf20Sopenharmony_cistruct tbf_sched_data { 988c2ecf20Sopenharmony_ci/* Parameters */ 998c2ecf20Sopenharmony_ci u32 limit; /* Maximal length of backlog: bytes */ 1008c2ecf20Sopenharmony_ci u32 max_size; 1018c2ecf20Sopenharmony_ci s64 buffer; /* Token bucket depth/rate: MUST BE >= MTU/B */ 1028c2ecf20Sopenharmony_ci s64 mtu; 1038c2ecf20Sopenharmony_ci struct psched_ratecfg rate; 1048c2ecf20Sopenharmony_ci struct psched_ratecfg peak; 1058c2ecf20Sopenharmony_ci 1068c2ecf20Sopenharmony_ci/* Variables */ 1078c2ecf20Sopenharmony_ci s64 tokens; /* Current number of B tokens */ 1088c2ecf20Sopenharmony_ci s64 ptokens; /* Current number of P tokens */ 1098c2ecf20Sopenharmony_ci s64 t_c; /* Time check-point */ 1108c2ecf20Sopenharmony_ci struct Qdisc *qdisc; /* Inner qdisc, default - bfifo queue */ 1118c2ecf20Sopenharmony_ci struct qdisc_watchdog watchdog; /* Watchdog timer */ 1128c2ecf20Sopenharmony_ci}; 1138c2ecf20Sopenharmony_ci 1148c2ecf20Sopenharmony_ci 1158c2ecf20Sopenharmony_ci/* Time to Length, convert time in ns to length in bytes 1168c2ecf20Sopenharmony_ci * to determinate how many bytes can be sent in given time. 1178c2ecf20Sopenharmony_ci */ 1188c2ecf20Sopenharmony_cistatic u64 psched_ns_t2l(const struct psched_ratecfg *r, 1198c2ecf20Sopenharmony_ci u64 time_in_ns) 1208c2ecf20Sopenharmony_ci{ 1218c2ecf20Sopenharmony_ci /* The formula is : 1228c2ecf20Sopenharmony_ci * len = (time_in_ns * r->rate_bytes_ps) / NSEC_PER_SEC 1238c2ecf20Sopenharmony_ci */ 1248c2ecf20Sopenharmony_ci u64 len = time_in_ns * r->rate_bytes_ps; 1258c2ecf20Sopenharmony_ci 1268c2ecf20Sopenharmony_ci do_div(len, NSEC_PER_SEC); 1278c2ecf20Sopenharmony_ci 1288c2ecf20Sopenharmony_ci if (unlikely(r->linklayer == TC_LINKLAYER_ATM)) { 1298c2ecf20Sopenharmony_ci do_div(len, 53); 1308c2ecf20Sopenharmony_ci len = len * 48; 1318c2ecf20Sopenharmony_ci } 1328c2ecf20Sopenharmony_ci 1338c2ecf20Sopenharmony_ci if (len > r->overhead) 1348c2ecf20Sopenharmony_ci len -= r->overhead; 1358c2ecf20Sopenharmony_ci else 1368c2ecf20Sopenharmony_ci len = 0; 1378c2ecf20Sopenharmony_ci 1388c2ecf20Sopenharmony_ci return len; 1398c2ecf20Sopenharmony_ci} 1408c2ecf20Sopenharmony_ci 1418c2ecf20Sopenharmony_cistatic void tbf_offload_change(struct Qdisc *sch) 1428c2ecf20Sopenharmony_ci{ 1438c2ecf20Sopenharmony_ci struct tbf_sched_data *q = qdisc_priv(sch); 1448c2ecf20Sopenharmony_ci struct net_device *dev = qdisc_dev(sch); 1458c2ecf20Sopenharmony_ci struct tc_tbf_qopt_offload qopt; 1468c2ecf20Sopenharmony_ci 1478c2ecf20Sopenharmony_ci if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) 1488c2ecf20Sopenharmony_ci return; 1498c2ecf20Sopenharmony_ci 1508c2ecf20Sopenharmony_ci qopt.command = TC_TBF_REPLACE; 1518c2ecf20Sopenharmony_ci qopt.handle = sch->handle; 1528c2ecf20Sopenharmony_ci qopt.parent = sch->parent; 1538c2ecf20Sopenharmony_ci qopt.replace_params.rate = q->rate; 1548c2ecf20Sopenharmony_ci qopt.replace_params.max_size = q->max_size; 1558c2ecf20Sopenharmony_ci qopt.replace_params.qstats = &sch->qstats; 1568c2ecf20Sopenharmony_ci 1578c2ecf20Sopenharmony_ci dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TBF, &qopt); 1588c2ecf20Sopenharmony_ci} 1598c2ecf20Sopenharmony_ci 1608c2ecf20Sopenharmony_cistatic void tbf_offload_destroy(struct Qdisc *sch) 1618c2ecf20Sopenharmony_ci{ 1628c2ecf20Sopenharmony_ci struct net_device *dev = qdisc_dev(sch); 1638c2ecf20Sopenharmony_ci struct tc_tbf_qopt_offload qopt; 1648c2ecf20Sopenharmony_ci 1658c2ecf20Sopenharmony_ci if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) 1668c2ecf20Sopenharmony_ci return; 1678c2ecf20Sopenharmony_ci 1688c2ecf20Sopenharmony_ci qopt.command = TC_TBF_DESTROY; 1698c2ecf20Sopenharmony_ci qopt.handle = sch->handle; 1708c2ecf20Sopenharmony_ci qopt.parent = sch->parent; 1718c2ecf20Sopenharmony_ci dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TBF, &qopt); 1728c2ecf20Sopenharmony_ci} 1738c2ecf20Sopenharmony_ci 1748c2ecf20Sopenharmony_cistatic int tbf_offload_dump(struct Qdisc *sch) 1758c2ecf20Sopenharmony_ci{ 1768c2ecf20Sopenharmony_ci struct tc_tbf_qopt_offload qopt; 1778c2ecf20Sopenharmony_ci 1788c2ecf20Sopenharmony_ci qopt.command = TC_TBF_STATS; 1798c2ecf20Sopenharmony_ci qopt.handle = sch->handle; 1808c2ecf20Sopenharmony_ci qopt.parent = sch->parent; 1818c2ecf20Sopenharmony_ci qopt.stats.bstats = &sch->bstats; 1828c2ecf20Sopenharmony_ci qopt.stats.qstats = &sch->qstats; 1838c2ecf20Sopenharmony_ci 1848c2ecf20Sopenharmony_ci return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_TBF, &qopt); 1858c2ecf20Sopenharmony_ci} 1868c2ecf20Sopenharmony_ci 1878c2ecf20Sopenharmony_ci/* GSO packet is too big, segment it so that tbf can transmit 1888c2ecf20Sopenharmony_ci * each segment in time 1898c2ecf20Sopenharmony_ci */ 1908c2ecf20Sopenharmony_cistatic int tbf_segment(struct sk_buff *skb, struct Qdisc *sch, 1918c2ecf20Sopenharmony_ci struct sk_buff **to_free) 1928c2ecf20Sopenharmony_ci{ 1938c2ecf20Sopenharmony_ci struct tbf_sched_data *q = qdisc_priv(sch); 1948c2ecf20Sopenharmony_ci struct sk_buff *segs, *nskb; 1958c2ecf20Sopenharmony_ci netdev_features_t features = netif_skb_features(skb); 1968c2ecf20Sopenharmony_ci unsigned int len = 0, prev_len = qdisc_pkt_len(skb); 1978c2ecf20Sopenharmony_ci int ret, nb; 1988c2ecf20Sopenharmony_ci 1998c2ecf20Sopenharmony_ci segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); 2008c2ecf20Sopenharmony_ci 2018c2ecf20Sopenharmony_ci if (IS_ERR_OR_NULL(segs)) 2028c2ecf20Sopenharmony_ci return qdisc_drop(skb, sch, to_free); 2038c2ecf20Sopenharmony_ci 2048c2ecf20Sopenharmony_ci nb = 0; 2058c2ecf20Sopenharmony_ci skb_list_walk_safe(segs, segs, nskb) { 2068c2ecf20Sopenharmony_ci skb_mark_not_on_list(segs); 2078c2ecf20Sopenharmony_ci qdisc_skb_cb(segs)->pkt_len = segs->len; 2088c2ecf20Sopenharmony_ci len += segs->len; 2098c2ecf20Sopenharmony_ci ret = qdisc_enqueue(segs, q->qdisc, to_free); 2108c2ecf20Sopenharmony_ci if (ret != NET_XMIT_SUCCESS) { 2118c2ecf20Sopenharmony_ci if (net_xmit_drop_count(ret)) 2128c2ecf20Sopenharmony_ci qdisc_qstats_drop(sch); 2138c2ecf20Sopenharmony_ci } else { 2148c2ecf20Sopenharmony_ci nb++; 2158c2ecf20Sopenharmony_ci } 2168c2ecf20Sopenharmony_ci } 2178c2ecf20Sopenharmony_ci sch->q.qlen += nb; 2188c2ecf20Sopenharmony_ci if (nb > 1) 2198c2ecf20Sopenharmony_ci qdisc_tree_reduce_backlog(sch, 1 - nb, prev_len - len); 2208c2ecf20Sopenharmony_ci consume_skb(skb); 2218c2ecf20Sopenharmony_ci return nb > 0 ? NET_XMIT_SUCCESS : NET_XMIT_DROP; 2228c2ecf20Sopenharmony_ci} 2238c2ecf20Sopenharmony_ci 2248c2ecf20Sopenharmony_cistatic int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch, 2258c2ecf20Sopenharmony_ci struct sk_buff **to_free) 2268c2ecf20Sopenharmony_ci{ 2278c2ecf20Sopenharmony_ci struct tbf_sched_data *q = qdisc_priv(sch); 2288c2ecf20Sopenharmony_ci unsigned int len = qdisc_pkt_len(skb); 2298c2ecf20Sopenharmony_ci int ret; 2308c2ecf20Sopenharmony_ci 2318c2ecf20Sopenharmony_ci if (qdisc_pkt_len(skb) > q->max_size) { 2328c2ecf20Sopenharmony_ci if (skb_is_gso(skb) && 2338c2ecf20Sopenharmony_ci skb_gso_validate_mac_len(skb, q->max_size)) 2348c2ecf20Sopenharmony_ci return tbf_segment(skb, sch, to_free); 2358c2ecf20Sopenharmony_ci return qdisc_drop(skb, sch, to_free); 2368c2ecf20Sopenharmony_ci } 2378c2ecf20Sopenharmony_ci ret = qdisc_enqueue(skb, q->qdisc, to_free); 2388c2ecf20Sopenharmony_ci if (ret != NET_XMIT_SUCCESS) { 2398c2ecf20Sopenharmony_ci if (net_xmit_drop_count(ret)) 2408c2ecf20Sopenharmony_ci qdisc_qstats_drop(sch); 2418c2ecf20Sopenharmony_ci return ret; 2428c2ecf20Sopenharmony_ci } 2438c2ecf20Sopenharmony_ci 2448c2ecf20Sopenharmony_ci sch->qstats.backlog += len; 2458c2ecf20Sopenharmony_ci sch->q.qlen++; 2468c2ecf20Sopenharmony_ci return NET_XMIT_SUCCESS; 2478c2ecf20Sopenharmony_ci} 2488c2ecf20Sopenharmony_ci 2498c2ecf20Sopenharmony_cistatic bool tbf_peak_present(const struct tbf_sched_data *q) 2508c2ecf20Sopenharmony_ci{ 2518c2ecf20Sopenharmony_ci return q->peak.rate_bytes_ps; 2528c2ecf20Sopenharmony_ci} 2538c2ecf20Sopenharmony_ci 2548c2ecf20Sopenharmony_cistatic struct sk_buff *tbf_dequeue(struct Qdisc *sch) 2558c2ecf20Sopenharmony_ci{ 2568c2ecf20Sopenharmony_ci struct tbf_sched_data *q = qdisc_priv(sch); 2578c2ecf20Sopenharmony_ci struct sk_buff *skb; 2588c2ecf20Sopenharmony_ci 2598c2ecf20Sopenharmony_ci skb = q->qdisc->ops->peek(q->qdisc); 2608c2ecf20Sopenharmony_ci 2618c2ecf20Sopenharmony_ci if (skb) { 2628c2ecf20Sopenharmony_ci s64 now; 2638c2ecf20Sopenharmony_ci s64 toks; 2648c2ecf20Sopenharmony_ci s64 ptoks = 0; 2658c2ecf20Sopenharmony_ci unsigned int len = qdisc_pkt_len(skb); 2668c2ecf20Sopenharmony_ci 2678c2ecf20Sopenharmony_ci now = ktime_get_ns(); 2688c2ecf20Sopenharmony_ci toks = min_t(s64, now - q->t_c, q->buffer); 2698c2ecf20Sopenharmony_ci 2708c2ecf20Sopenharmony_ci if (tbf_peak_present(q)) { 2718c2ecf20Sopenharmony_ci ptoks = toks + q->ptokens; 2728c2ecf20Sopenharmony_ci if (ptoks > q->mtu) 2738c2ecf20Sopenharmony_ci ptoks = q->mtu; 2748c2ecf20Sopenharmony_ci ptoks -= (s64) psched_l2t_ns(&q->peak, len); 2758c2ecf20Sopenharmony_ci } 2768c2ecf20Sopenharmony_ci toks += q->tokens; 2778c2ecf20Sopenharmony_ci if (toks > q->buffer) 2788c2ecf20Sopenharmony_ci toks = q->buffer; 2798c2ecf20Sopenharmony_ci toks -= (s64) psched_l2t_ns(&q->rate, len); 2808c2ecf20Sopenharmony_ci 2818c2ecf20Sopenharmony_ci if ((toks|ptoks) >= 0) { 2828c2ecf20Sopenharmony_ci skb = qdisc_dequeue_peeked(q->qdisc); 2838c2ecf20Sopenharmony_ci if (unlikely(!skb)) 2848c2ecf20Sopenharmony_ci return NULL; 2858c2ecf20Sopenharmony_ci 2868c2ecf20Sopenharmony_ci q->t_c = now; 2878c2ecf20Sopenharmony_ci q->tokens = toks; 2888c2ecf20Sopenharmony_ci q->ptokens = ptoks; 2898c2ecf20Sopenharmony_ci qdisc_qstats_backlog_dec(sch, skb); 2908c2ecf20Sopenharmony_ci sch->q.qlen--; 2918c2ecf20Sopenharmony_ci qdisc_bstats_update(sch, skb); 2928c2ecf20Sopenharmony_ci return skb; 2938c2ecf20Sopenharmony_ci } 2948c2ecf20Sopenharmony_ci 2958c2ecf20Sopenharmony_ci qdisc_watchdog_schedule_ns(&q->watchdog, 2968c2ecf20Sopenharmony_ci now + max_t(long, -toks, -ptoks)); 2978c2ecf20Sopenharmony_ci 2988c2ecf20Sopenharmony_ci /* Maybe we have a shorter packet in the queue, 2998c2ecf20Sopenharmony_ci which can be sent now. It sounds cool, 3008c2ecf20Sopenharmony_ci but, however, this is wrong in principle. 3018c2ecf20Sopenharmony_ci We MUST NOT reorder packets under these circumstances. 3028c2ecf20Sopenharmony_ci 3038c2ecf20Sopenharmony_ci Really, if we split the flow into independent 3048c2ecf20Sopenharmony_ci subflows, it would be a very good solution. 3058c2ecf20Sopenharmony_ci This is the main idea of all FQ algorithms 3068c2ecf20Sopenharmony_ci (cf. CSZ, HPFQ, HFSC) 3078c2ecf20Sopenharmony_ci */ 3088c2ecf20Sopenharmony_ci 3098c2ecf20Sopenharmony_ci qdisc_qstats_overlimit(sch); 3108c2ecf20Sopenharmony_ci } 3118c2ecf20Sopenharmony_ci return NULL; 3128c2ecf20Sopenharmony_ci} 3138c2ecf20Sopenharmony_ci 3148c2ecf20Sopenharmony_cistatic void tbf_reset(struct Qdisc *sch) 3158c2ecf20Sopenharmony_ci{ 3168c2ecf20Sopenharmony_ci struct tbf_sched_data *q = qdisc_priv(sch); 3178c2ecf20Sopenharmony_ci 3188c2ecf20Sopenharmony_ci qdisc_reset(q->qdisc); 3198c2ecf20Sopenharmony_ci q->t_c = ktime_get_ns(); 3208c2ecf20Sopenharmony_ci q->tokens = q->buffer; 3218c2ecf20Sopenharmony_ci q->ptokens = q->mtu; 3228c2ecf20Sopenharmony_ci qdisc_watchdog_cancel(&q->watchdog); 3238c2ecf20Sopenharmony_ci} 3248c2ecf20Sopenharmony_ci 3258c2ecf20Sopenharmony_cistatic const struct nla_policy tbf_policy[TCA_TBF_MAX + 1] = { 3268c2ecf20Sopenharmony_ci [TCA_TBF_PARMS] = { .len = sizeof(struct tc_tbf_qopt) }, 3278c2ecf20Sopenharmony_ci [TCA_TBF_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE }, 3288c2ecf20Sopenharmony_ci [TCA_TBF_PTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE }, 3298c2ecf20Sopenharmony_ci [TCA_TBF_RATE64] = { .type = NLA_U64 }, 3308c2ecf20Sopenharmony_ci [TCA_TBF_PRATE64] = { .type = NLA_U64 }, 3318c2ecf20Sopenharmony_ci [TCA_TBF_BURST] = { .type = NLA_U32 }, 3328c2ecf20Sopenharmony_ci [TCA_TBF_PBURST] = { .type = NLA_U32 }, 3338c2ecf20Sopenharmony_ci}; 3348c2ecf20Sopenharmony_ci 3358c2ecf20Sopenharmony_cistatic int tbf_change(struct Qdisc *sch, struct nlattr *opt, 3368c2ecf20Sopenharmony_ci struct netlink_ext_ack *extack) 3378c2ecf20Sopenharmony_ci{ 3388c2ecf20Sopenharmony_ci int err; 3398c2ecf20Sopenharmony_ci struct tbf_sched_data *q = qdisc_priv(sch); 3408c2ecf20Sopenharmony_ci struct nlattr *tb[TCA_TBF_MAX + 1]; 3418c2ecf20Sopenharmony_ci struct tc_tbf_qopt *qopt; 3428c2ecf20Sopenharmony_ci struct Qdisc *child = NULL; 3438c2ecf20Sopenharmony_ci struct Qdisc *old = NULL; 3448c2ecf20Sopenharmony_ci struct psched_ratecfg rate; 3458c2ecf20Sopenharmony_ci struct psched_ratecfg peak; 3468c2ecf20Sopenharmony_ci u64 max_size; 3478c2ecf20Sopenharmony_ci s64 buffer, mtu; 3488c2ecf20Sopenharmony_ci u64 rate64 = 0, prate64 = 0; 3498c2ecf20Sopenharmony_ci 3508c2ecf20Sopenharmony_ci err = nla_parse_nested_deprecated(tb, TCA_TBF_MAX, opt, tbf_policy, 3518c2ecf20Sopenharmony_ci NULL); 3528c2ecf20Sopenharmony_ci if (err < 0) 3538c2ecf20Sopenharmony_ci return err; 3548c2ecf20Sopenharmony_ci 3558c2ecf20Sopenharmony_ci err = -EINVAL; 3568c2ecf20Sopenharmony_ci if (tb[TCA_TBF_PARMS] == NULL) 3578c2ecf20Sopenharmony_ci goto done; 3588c2ecf20Sopenharmony_ci 3598c2ecf20Sopenharmony_ci qopt = nla_data(tb[TCA_TBF_PARMS]); 3608c2ecf20Sopenharmony_ci if (qopt->rate.linklayer == TC_LINKLAYER_UNAWARE) 3618c2ecf20Sopenharmony_ci qdisc_put_rtab(qdisc_get_rtab(&qopt->rate, 3628c2ecf20Sopenharmony_ci tb[TCA_TBF_RTAB], 3638c2ecf20Sopenharmony_ci NULL)); 3648c2ecf20Sopenharmony_ci 3658c2ecf20Sopenharmony_ci if (qopt->peakrate.linklayer == TC_LINKLAYER_UNAWARE) 3668c2ecf20Sopenharmony_ci qdisc_put_rtab(qdisc_get_rtab(&qopt->peakrate, 3678c2ecf20Sopenharmony_ci tb[TCA_TBF_PTAB], 3688c2ecf20Sopenharmony_ci NULL)); 3698c2ecf20Sopenharmony_ci 3708c2ecf20Sopenharmony_ci buffer = min_t(u64, PSCHED_TICKS2NS(qopt->buffer), ~0U); 3718c2ecf20Sopenharmony_ci mtu = min_t(u64, PSCHED_TICKS2NS(qopt->mtu), ~0U); 3728c2ecf20Sopenharmony_ci 3738c2ecf20Sopenharmony_ci if (tb[TCA_TBF_RATE64]) 3748c2ecf20Sopenharmony_ci rate64 = nla_get_u64(tb[TCA_TBF_RATE64]); 3758c2ecf20Sopenharmony_ci psched_ratecfg_precompute(&rate, &qopt->rate, rate64); 3768c2ecf20Sopenharmony_ci 3778c2ecf20Sopenharmony_ci if (tb[TCA_TBF_BURST]) { 3788c2ecf20Sopenharmony_ci max_size = nla_get_u32(tb[TCA_TBF_BURST]); 3798c2ecf20Sopenharmony_ci buffer = psched_l2t_ns(&rate, max_size); 3808c2ecf20Sopenharmony_ci } else { 3818c2ecf20Sopenharmony_ci max_size = min_t(u64, psched_ns_t2l(&rate, buffer), ~0U); 3828c2ecf20Sopenharmony_ci } 3838c2ecf20Sopenharmony_ci 3848c2ecf20Sopenharmony_ci if (qopt->peakrate.rate) { 3858c2ecf20Sopenharmony_ci if (tb[TCA_TBF_PRATE64]) 3868c2ecf20Sopenharmony_ci prate64 = nla_get_u64(tb[TCA_TBF_PRATE64]); 3878c2ecf20Sopenharmony_ci psched_ratecfg_precompute(&peak, &qopt->peakrate, prate64); 3888c2ecf20Sopenharmony_ci if (peak.rate_bytes_ps <= rate.rate_bytes_ps) { 3898c2ecf20Sopenharmony_ci pr_warn_ratelimited("sch_tbf: peakrate %llu is lower than or equals to rate %llu !\n", 3908c2ecf20Sopenharmony_ci peak.rate_bytes_ps, rate.rate_bytes_ps); 3918c2ecf20Sopenharmony_ci err = -EINVAL; 3928c2ecf20Sopenharmony_ci goto done; 3938c2ecf20Sopenharmony_ci } 3948c2ecf20Sopenharmony_ci 3958c2ecf20Sopenharmony_ci if (tb[TCA_TBF_PBURST]) { 3968c2ecf20Sopenharmony_ci u32 pburst = nla_get_u32(tb[TCA_TBF_PBURST]); 3978c2ecf20Sopenharmony_ci max_size = min_t(u32, max_size, pburst); 3988c2ecf20Sopenharmony_ci mtu = psched_l2t_ns(&peak, pburst); 3998c2ecf20Sopenharmony_ci } else { 4008c2ecf20Sopenharmony_ci max_size = min_t(u64, max_size, psched_ns_t2l(&peak, mtu)); 4018c2ecf20Sopenharmony_ci } 4028c2ecf20Sopenharmony_ci } else { 4038c2ecf20Sopenharmony_ci memset(&peak, 0, sizeof(peak)); 4048c2ecf20Sopenharmony_ci } 4058c2ecf20Sopenharmony_ci 4068c2ecf20Sopenharmony_ci if (max_size < psched_mtu(qdisc_dev(sch))) 4078c2ecf20Sopenharmony_ci pr_warn_ratelimited("sch_tbf: burst %llu is lower than device %s mtu (%u) !\n", 4088c2ecf20Sopenharmony_ci max_size, qdisc_dev(sch)->name, 4098c2ecf20Sopenharmony_ci psched_mtu(qdisc_dev(sch))); 4108c2ecf20Sopenharmony_ci 4118c2ecf20Sopenharmony_ci if (!max_size) { 4128c2ecf20Sopenharmony_ci err = -EINVAL; 4138c2ecf20Sopenharmony_ci goto done; 4148c2ecf20Sopenharmony_ci } 4158c2ecf20Sopenharmony_ci 4168c2ecf20Sopenharmony_ci if (q->qdisc != &noop_qdisc) { 4178c2ecf20Sopenharmony_ci err = fifo_set_limit(q->qdisc, qopt->limit); 4188c2ecf20Sopenharmony_ci if (err) 4198c2ecf20Sopenharmony_ci goto done; 4208c2ecf20Sopenharmony_ci } else if (qopt->limit > 0) { 4218c2ecf20Sopenharmony_ci child = fifo_create_dflt(sch, &bfifo_qdisc_ops, qopt->limit, 4228c2ecf20Sopenharmony_ci extack); 4238c2ecf20Sopenharmony_ci if (IS_ERR(child)) { 4248c2ecf20Sopenharmony_ci err = PTR_ERR(child); 4258c2ecf20Sopenharmony_ci goto done; 4268c2ecf20Sopenharmony_ci } 4278c2ecf20Sopenharmony_ci 4288c2ecf20Sopenharmony_ci /* child is fifo, no need to check for noop_qdisc */ 4298c2ecf20Sopenharmony_ci qdisc_hash_add(child, true); 4308c2ecf20Sopenharmony_ci } 4318c2ecf20Sopenharmony_ci 4328c2ecf20Sopenharmony_ci sch_tree_lock(sch); 4338c2ecf20Sopenharmony_ci if (child) { 4348c2ecf20Sopenharmony_ci qdisc_tree_flush_backlog(q->qdisc); 4358c2ecf20Sopenharmony_ci old = q->qdisc; 4368c2ecf20Sopenharmony_ci q->qdisc = child; 4378c2ecf20Sopenharmony_ci } 4388c2ecf20Sopenharmony_ci q->limit = qopt->limit; 4398c2ecf20Sopenharmony_ci if (tb[TCA_TBF_PBURST]) 4408c2ecf20Sopenharmony_ci q->mtu = mtu; 4418c2ecf20Sopenharmony_ci else 4428c2ecf20Sopenharmony_ci q->mtu = PSCHED_TICKS2NS(qopt->mtu); 4438c2ecf20Sopenharmony_ci q->max_size = max_size; 4448c2ecf20Sopenharmony_ci if (tb[TCA_TBF_BURST]) 4458c2ecf20Sopenharmony_ci q->buffer = buffer; 4468c2ecf20Sopenharmony_ci else 4478c2ecf20Sopenharmony_ci q->buffer = PSCHED_TICKS2NS(qopt->buffer); 4488c2ecf20Sopenharmony_ci q->tokens = q->buffer; 4498c2ecf20Sopenharmony_ci q->ptokens = q->mtu; 4508c2ecf20Sopenharmony_ci 4518c2ecf20Sopenharmony_ci memcpy(&q->rate, &rate, sizeof(struct psched_ratecfg)); 4528c2ecf20Sopenharmony_ci memcpy(&q->peak, &peak, sizeof(struct psched_ratecfg)); 4538c2ecf20Sopenharmony_ci 4548c2ecf20Sopenharmony_ci sch_tree_unlock(sch); 4558c2ecf20Sopenharmony_ci qdisc_put(old); 4568c2ecf20Sopenharmony_ci err = 0; 4578c2ecf20Sopenharmony_ci 4588c2ecf20Sopenharmony_ci tbf_offload_change(sch); 4598c2ecf20Sopenharmony_cidone: 4608c2ecf20Sopenharmony_ci return err; 4618c2ecf20Sopenharmony_ci} 4628c2ecf20Sopenharmony_ci 4638c2ecf20Sopenharmony_cistatic int tbf_init(struct Qdisc *sch, struct nlattr *opt, 4648c2ecf20Sopenharmony_ci struct netlink_ext_ack *extack) 4658c2ecf20Sopenharmony_ci{ 4668c2ecf20Sopenharmony_ci struct tbf_sched_data *q = qdisc_priv(sch); 4678c2ecf20Sopenharmony_ci 4688c2ecf20Sopenharmony_ci qdisc_watchdog_init(&q->watchdog, sch); 4698c2ecf20Sopenharmony_ci q->qdisc = &noop_qdisc; 4708c2ecf20Sopenharmony_ci 4718c2ecf20Sopenharmony_ci if (!opt) 4728c2ecf20Sopenharmony_ci return -EINVAL; 4738c2ecf20Sopenharmony_ci 4748c2ecf20Sopenharmony_ci q->t_c = ktime_get_ns(); 4758c2ecf20Sopenharmony_ci 4768c2ecf20Sopenharmony_ci return tbf_change(sch, opt, extack); 4778c2ecf20Sopenharmony_ci} 4788c2ecf20Sopenharmony_ci 4798c2ecf20Sopenharmony_cistatic void tbf_destroy(struct Qdisc *sch) 4808c2ecf20Sopenharmony_ci{ 4818c2ecf20Sopenharmony_ci struct tbf_sched_data *q = qdisc_priv(sch); 4828c2ecf20Sopenharmony_ci 4838c2ecf20Sopenharmony_ci qdisc_watchdog_cancel(&q->watchdog); 4848c2ecf20Sopenharmony_ci tbf_offload_destroy(sch); 4858c2ecf20Sopenharmony_ci qdisc_put(q->qdisc); 4868c2ecf20Sopenharmony_ci} 4878c2ecf20Sopenharmony_ci 4888c2ecf20Sopenharmony_cistatic int tbf_dump(struct Qdisc *sch, struct sk_buff *skb) 4898c2ecf20Sopenharmony_ci{ 4908c2ecf20Sopenharmony_ci struct tbf_sched_data *q = qdisc_priv(sch); 4918c2ecf20Sopenharmony_ci struct nlattr *nest; 4928c2ecf20Sopenharmony_ci struct tc_tbf_qopt opt; 4938c2ecf20Sopenharmony_ci int err; 4948c2ecf20Sopenharmony_ci 4958c2ecf20Sopenharmony_ci err = tbf_offload_dump(sch); 4968c2ecf20Sopenharmony_ci if (err) 4978c2ecf20Sopenharmony_ci return err; 4988c2ecf20Sopenharmony_ci 4998c2ecf20Sopenharmony_ci nest = nla_nest_start_noflag(skb, TCA_OPTIONS); 5008c2ecf20Sopenharmony_ci if (nest == NULL) 5018c2ecf20Sopenharmony_ci goto nla_put_failure; 5028c2ecf20Sopenharmony_ci 5038c2ecf20Sopenharmony_ci opt.limit = q->limit; 5048c2ecf20Sopenharmony_ci psched_ratecfg_getrate(&opt.rate, &q->rate); 5058c2ecf20Sopenharmony_ci if (tbf_peak_present(q)) 5068c2ecf20Sopenharmony_ci psched_ratecfg_getrate(&opt.peakrate, &q->peak); 5078c2ecf20Sopenharmony_ci else 5088c2ecf20Sopenharmony_ci memset(&opt.peakrate, 0, sizeof(opt.peakrate)); 5098c2ecf20Sopenharmony_ci opt.mtu = PSCHED_NS2TICKS(q->mtu); 5108c2ecf20Sopenharmony_ci opt.buffer = PSCHED_NS2TICKS(q->buffer); 5118c2ecf20Sopenharmony_ci if (nla_put(skb, TCA_TBF_PARMS, sizeof(opt), &opt)) 5128c2ecf20Sopenharmony_ci goto nla_put_failure; 5138c2ecf20Sopenharmony_ci if (q->rate.rate_bytes_ps >= (1ULL << 32) && 5148c2ecf20Sopenharmony_ci nla_put_u64_64bit(skb, TCA_TBF_RATE64, q->rate.rate_bytes_ps, 5158c2ecf20Sopenharmony_ci TCA_TBF_PAD)) 5168c2ecf20Sopenharmony_ci goto nla_put_failure; 5178c2ecf20Sopenharmony_ci if (tbf_peak_present(q) && 5188c2ecf20Sopenharmony_ci q->peak.rate_bytes_ps >= (1ULL << 32) && 5198c2ecf20Sopenharmony_ci nla_put_u64_64bit(skb, TCA_TBF_PRATE64, q->peak.rate_bytes_ps, 5208c2ecf20Sopenharmony_ci TCA_TBF_PAD)) 5218c2ecf20Sopenharmony_ci goto nla_put_failure; 5228c2ecf20Sopenharmony_ci 5238c2ecf20Sopenharmony_ci return nla_nest_end(skb, nest); 5248c2ecf20Sopenharmony_ci 5258c2ecf20Sopenharmony_cinla_put_failure: 5268c2ecf20Sopenharmony_ci nla_nest_cancel(skb, nest); 5278c2ecf20Sopenharmony_ci return -1; 5288c2ecf20Sopenharmony_ci} 5298c2ecf20Sopenharmony_ci 5308c2ecf20Sopenharmony_cistatic int tbf_dump_class(struct Qdisc *sch, unsigned long cl, 5318c2ecf20Sopenharmony_ci struct sk_buff *skb, struct tcmsg *tcm) 5328c2ecf20Sopenharmony_ci{ 5338c2ecf20Sopenharmony_ci struct tbf_sched_data *q = qdisc_priv(sch); 5348c2ecf20Sopenharmony_ci 5358c2ecf20Sopenharmony_ci tcm->tcm_handle |= TC_H_MIN(1); 5368c2ecf20Sopenharmony_ci tcm->tcm_info = q->qdisc->handle; 5378c2ecf20Sopenharmony_ci 5388c2ecf20Sopenharmony_ci return 0; 5398c2ecf20Sopenharmony_ci} 5408c2ecf20Sopenharmony_ci 5418c2ecf20Sopenharmony_cistatic int tbf_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, 5428c2ecf20Sopenharmony_ci struct Qdisc **old, struct netlink_ext_ack *extack) 5438c2ecf20Sopenharmony_ci{ 5448c2ecf20Sopenharmony_ci struct tbf_sched_data *q = qdisc_priv(sch); 5458c2ecf20Sopenharmony_ci 5468c2ecf20Sopenharmony_ci if (new == NULL) 5478c2ecf20Sopenharmony_ci new = &noop_qdisc; 5488c2ecf20Sopenharmony_ci 5498c2ecf20Sopenharmony_ci *old = qdisc_replace(sch, new, &q->qdisc); 5508c2ecf20Sopenharmony_ci return 0; 5518c2ecf20Sopenharmony_ci} 5528c2ecf20Sopenharmony_ci 5538c2ecf20Sopenharmony_cistatic struct Qdisc *tbf_leaf(struct Qdisc *sch, unsigned long arg) 5548c2ecf20Sopenharmony_ci{ 5558c2ecf20Sopenharmony_ci struct tbf_sched_data *q = qdisc_priv(sch); 5568c2ecf20Sopenharmony_ci return q->qdisc; 5578c2ecf20Sopenharmony_ci} 5588c2ecf20Sopenharmony_ci 5598c2ecf20Sopenharmony_cistatic unsigned long tbf_find(struct Qdisc *sch, u32 classid) 5608c2ecf20Sopenharmony_ci{ 5618c2ecf20Sopenharmony_ci return 1; 5628c2ecf20Sopenharmony_ci} 5638c2ecf20Sopenharmony_ci 5648c2ecf20Sopenharmony_cistatic void tbf_walk(struct Qdisc *sch, struct qdisc_walker *walker) 5658c2ecf20Sopenharmony_ci{ 5668c2ecf20Sopenharmony_ci if (!walker->stop) { 5678c2ecf20Sopenharmony_ci if (walker->count >= walker->skip) 5688c2ecf20Sopenharmony_ci if (walker->fn(sch, 1, walker) < 0) { 5698c2ecf20Sopenharmony_ci walker->stop = 1; 5708c2ecf20Sopenharmony_ci return; 5718c2ecf20Sopenharmony_ci } 5728c2ecf20Sopenharmony_ci walker->count++; 5738c2ecf20Sopenharmony_ci } 5748c2ecf20Sopenharmony_ci} 5758c2ecf20Sopenharmony_ci 5768c2ecf20Sopenharmony_cistatic const struct Qdisc_class_ops tbf_class_ops = { 5778c2ecf20Sopenharmony_ci .graft = tbf_graft, 5788c2ecf20Sopenharmony_ci .leaf = tbf_leaf, 5798c2ecf20Sopenharmony_ci .find = tbf_find, 5808c2ecf20Sopenharmony_ci .walk = tbf_walk, 5818c2ecf20Sopenharmony_ci .dump = tbf_dump_class, 5828c2ecf20Sopenharmony_ci}; 5838c2ecf20Sopenharmony_ci 5848c2ecf20Sopenharmony_cistatic struct Qdisc_ops tbf_qdisc_ops __read_mostly = { 5858c2ecf20Sopenharmony_ci .next = NULL, 5868c2ecf20Sopenharmony_ci .cl_ops = &tbf_class_ops, 5878c2ecf20Sopenharmony_ci .id = "tbf", 5888c2ecf20Sopenharmony_ci .priv_size = sizeof(struct tbf_sched_data), 5898c2ecf20Sopenharmony_ci .enqueue = tbf_enqueue, 5908c2ecf20Sopenharmony_ci .dequeue = tbf_dequeue, 5918c2ecf20Sopenharmony_ci .peek = qdisc_peek_dequeued, 5928c2ecf20Sopenharmony_ci .init = tbf_init, 5938c2ecf20Sopenharmony_ci .reset = tbf_reset, 5948c2ecf20Sopenharmony_ci .destroy = tbf_destroy, 5958c2ecf20Sopenharmony_ci .change = tbf_change, 5968c2ecf20Sopenharmony_ci .dump = tbf_dump, 5978c2ecf20Sopenharmony_ci .owner = THIS_MODULE, 5988c2ecf20Sopenharmony_ci}; 5998c2ecf20Sopenharmony_ci 6008c2ecf20Sopenharmony_cistatic int __init tbf_module_init(void) 6018c2ecf20Sopenharmony_ci{ 6028c2ecf20Sopenharmony_ci return register_qdisc(&tbf_qdisc_ops); 6038c2ecf20Sopenharmony_ci} 6048c2ecf20Sopenharmony_ci 6058c2ecf20Sopenharmony_cistatic void __exit tbf_module_exit(void) 6068c2ecf20Sopenharmony_ci{ 6078c2ecf20Sopenharmony_ci unregister_qdisc(&tbf_qdisc_ops); 6088c2ecf20Sopenharmony_ci} 6098c2ecf20Sopenharmony_cimodule_init(tbf_module_init) 6108c2ecf20Sopenharmony_cimodule_exit(tbf_module_exit) 6118c2ecf20Sopenharmony_ciMODULE_LICENSE("GPL"); 612