18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * net/sched/sch_sfq.c Stochastic Fairness Queueing discipline. 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 68c2ecf20Sopenharmony_ci */ 78c2ecf20Sopenharmony_ci 88c2ecf20Sopenharmony_ci#include <linux/module.h> 98c2ecf20Sopenharmony_ci#include <linux/types.h> 108c2ecf20Sopenharmony_ci#include <linux/kernel.h> 118c2ecf20Sopenharmony_ci#include <linux/jiffies.h> 128c2ecf20Sopenharmony_ci#include <linux/string.h> 138c2ecf20Sopenharmony_ci#include <linux/in.h> 148c2ecf20Sopenharmony_ci#include <linux/errno.h> 158c2ecf20Sopenharmony_ci#include <linux/init.h> 168c2ecf20Sopenharmony_ci#include <linux/skbuff.h> 178c2ecf20Sopenharmony_ci#include <linux/siphash.h> 188c2ecf20Sopenharmony_ci#include <linux/slab.h> 198c2ecf20Sopenharmony_ci#include <linux/vmalloc.h> 208c2ecf20Sopenharmony_ci#include <net/netlink.h> 218c2ecf20Sopenharmony_ci#include <net/pkt_sched.h> 228c2ecf20Sopenharmony_ci#include <net/pkt_cls.h> 238c2ecf20Sopenharmony_ci#include <net/red.h> 248c2ecf20Sopenharmony_ci 258c2ecf20Sopenharmony_ci 268c2ecf20Sopenharmony_ci/* Stochastic Fairness Queuing algorithm. 278c2ecf20Sopenharmony_ci ======================================= 288c2ecf20Sopenharmony_ci 298c2ecf20Sopenharmony_ci Source: 308c2ecf20Sopenharmony_ci Paul E. McKenney "Stochastic Fairness Queuing", 318c2ecf20Sopenharmony_ci IEEE INFOCOMM'90 Proceedings, San Francisco, 1990. 328c2ecf20Sopenharmony_ci 338c2ecf20Sopenharmony_ci Paul E. McKenney "Stochastic Fairness Queuing", 348c2ecf20Sopenharmony_ci "Interworking: Research and Experience", v.2, 1991, p.113-131. 358c2ecf20Sopenharmony_ci 368c2ecf20Sopenharmony_ci 378c2ecf20Sopenharmony_ci See also: 388c2ecf20Sopenharmony_ci M. Shreedhar and George Varghese "Efficient Fair 398c2ecf20Sopenharmony_ci Queuing using Deficit Round Robin", Proc. SIGCOMM 95. 408c2ecf20Sopenharmony_ci 418c2ecf20Sopenharmony_ci 428c2ecf20Sopenharmony_ci This is not the thing that is usually called (W)FQ nowadays. 438c2ecf20Sopenharmony_ci It does not use any timestamp mechanism, but instead 448c2ecf20Sopenharmony_ci processes queues in round-robin order. 458c2ecf20Sopenharmony_ci 468c2ecf20Sopenharmony_ci ADVANTAGE: 478c2ecf20Sopenharmony_ci 488c2ecf20Sopenharmony_ci - It is very cheap. Both CPU and memory requirements are minimal. 498c2ecf20Sopenharmony_ci 508c2ecf20Sopenharmony_ci DRAWBACKS: 518c2ecf20Sopenharmony_ci 528c2ecf20Sopenharmony_ci - "Stochastic" -> It is not 100% fair. 538c2ecf20Sopenharmony_ci When hash collisions occur, several flows are considered as one. 548c2ecf20Sopenharmony_ci 558c2ecf20Sopenharmony_ci - "Round-robin" -> It introduces larger delays than virtual clock 568c2ecf20Sopenharmony_ci based schemes, and should not be used for isolating interactive 578c2ecf20Sopenharmony_ci traffic from non-interactive. It means, that this scheduler 588c2ecf20Sopenharmony_ci should be used as leaf of CBQ or P3, which put interactive traffic 598c2ecf20Sopenharmony_ci to higher priority band. 608c2ecf20Sopenharmony_ci 618c2ecf20Sopenharmony_ci We still need true WFQ for top level CSZ, but using WFQ 628c2ecf20Sopenharmony_ci for the best effort traffic is absolutely pointless: 638c2ecf20Sopenharmony_ci SFQ is superior for this purpose. 648c2ecf20Sopenharmony_ci 658c2ecf20Sopenharmony_ci IMPLEMENTATION: 668c2ecf20Sopenharmony_ci This implementation limits : 678c2ecf20Sopenharmony_ci - maximal queue length per flow to 127 packets. 688c2ecf20Sopenharmony_ci - max mtu to 2^18-1; 698c2ecf20Sopenharmony_ci - max 65408 flows, 708c2ecf20Sopenharmony_ci - number of hash buckets to 65536. 718c2ecf20Sopenharmony_ci 728c2ecf20Sopenharmony_ci It is easy to increase these values, but not in flight. */ 738c2ecf20Sopenharmony_ci 748c2ecf20Sopenharmony_ci#define SFQ_MAX_DEPTH 127 /* max number of packets per flow */ 758c2ecf20Sopenharmony_ci#define SFQ_DEFAULT_FLOWS 128 768c2ecf20Sopenharmony_ci#define SFQ_MAX_FLOWS (0x10000 - SFQ_MAX_DEPTH - 1) /* max number of flows */ 778c2ecf20Sopenharmony_ci#define SFQ_EMPTY_SLOT 0xffff 788c2ecf20Sopenharmony_ci#define SFQ_DEFAULT_HASH_DIVISOR 1024 798c2ecf20Sopenharmony_ci 808c2ecf20Sopenharmony_ci/* We use 16 bits to store allot, and want to handle packets up to 64K 818c2ecf20Sopenharmony_ci * Scale allot by 8 (1<<3) so that no overflow occurs. 828c2ecf20Sopenharmony_ci */ 838c2ecf20Sopenharmony_ci#define SFQ_ALLOT_SHIFT 3 848c2ecf20Sopenharmony_ci#define SFQ_ALLOT_SIZE(X) DIV_ROUND_UP(X, 1 << SFQ_ALLOT_SHIFT) 858c2ecf20Sopenharmony_ci 868c2ecf20Sopenharmony_ci/* This type should contain at least SFQ_MAX_DEPTH + 1 + SFQ_MAX_FLOWS values */ 878c2ecf20Sopenharmony_citypedef u16 sfq_index; 888c2ecf20Sopenharmony_ci 898c2ecf20Sopenharmony_ci/* 908c2ecf20Sopenharmony_ci * We dont use pointers to save space. 918c2ecf20Sopenharmony_ci * Small indexes [0 ... SFQ_MAX_FLOWS - 1] are 'pointers' to slots[] array 928c2ecf20Sopenharmony_ci * while following values [SFQ_MAX_FLOWS ... SFQ_MAX_FLOWS + SFQ_MAX_DEPTH] 938c2ecf20Sopenharmony_ci * are 'pointers' to dep[] array 948c2ecf20Sopenharmony_ci */ 958c2ecf20Sopenharmony_cistruct sfq_head { 968c2ecf20Sopenharmony_ci sfq_index next; 978c2ecf20Sopenharmony_ci sfq_index prev; 988c2ecf20Sopenharmony_ci}; 998c2ecf20Sopenharmony_ci 1008c2ecf20Sopenharmony_cistruct sfq_slot { 1018c2ecf20Sopenharmony_ci struct sk_buff *skblist_next; 1028c2ecf20Sopenharmony_ci struct sk_buff *skblist_prev; 1038c2ecf20Sopenharmony_ci sfq_index qlen; /* number of skbs in skblist */ 1048c2ecf20Sopenharmony_ci sfq_index next; /* next slot in sfq RR chain */ 1058c2ecf20Sopenharmony_ci struct sfq_head dep; /* anchor in dep[] chains */ 1068c2ecf20Sopenharmony_ci unsigned short hash; /* hash value (index in ht[]) */ 1078c2ecf20Sopenharmony_ci short allot; /* credit for this slot */ 1088c2ecf20Sopenharmony_ci 1098c2ecf20Sopenharmony_ci unsigned int backlog; 1108c2ecf20Sopenharmony_ci struct red_vars vars; 1118c2ecf20Sopenharmony_ci}; 1128c2ecf20Sopenharmony_ci 1138c2ecf20Sopenharmony_cistruct sfq_sched_data { 1148c2ecf20Sopenharmony_ci/* frequently used fields */ 1158c2ecf20Sopenharmony_ci int limit; /* limit of total number of packets in this qdisc */ 1168c2ecf20Sopenharmony_ci unsigned int divisor; /* number of slots in hash table */ 1178c2ecf20Sopenharmony_ci u8 headdrop; 1188c2ecf20Sopenharmony_ci u8 maxdepth; /* limit of packets per flow */ 1198c2ecf20Sopenharmony_ci 1208c2ecf20Sopenharmony_ci siphash_key_t perturbation; 1218c2ecf20Sopenharmony_ci u8 cur_depth; /* depth of longest slot */ 1228c2ecf20Sopenharmony_ci u8 flags; 1238c2ecf20Sopenharmony_ci unsigned short scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */ 1248c2ecf20Sopenharmony_ci struct tcf_proto __rcu *filter_list; 1258c2ecf20Sopenharmony_ci struct tcf_block *block; 1268c2ecf20Sopenharmony_ci sfq_index *ht; /* Hash table ('divisor' slots) */ 1278c2ecf20Sopenharmony_ci struct sfq_slot *slots; /* Flows table ('maxflows' entries) */ 1288c2ecf20Sopenharmony_ci 1298c2ecf20Sopenharmony_ci struct red_parms *red_parms; 1308c2ecf20Sopenharmony_ci struct tc_sfqred_stats stats; 1318c2ecf20Sopenharmony_ci struct sfq_slot *tail; /* current slot in round */ 1328c2ecf20Sopenharmony_ci 1338c2ecf20Sopenharmony_ci struct sfq_head dep[SFQ_MAX_DEPTH + 1]; 1348c2ecf20Sopenharmony_ci /* Linked lists of slots, indexed by depth 1358c2ecf20Sopenharmony_ci * dep[0] : list of unused flows 1368c2ecf20Sopenharmony_ci * dep[1] : list of flows with 1 packet 1378c2ecf20Sopenharmony_ci * dep[X] : list of flows with X packets 1388c2ecf20Sopenharmony_ci */ 1398c2ecf20Sopenharmony_ci 1408c2ecf20Sopenharmony_ci unsigned int maxflows; /* number of flows in flows array */ 1418c2ecf20Sopenharmony_ci int perturb_period; 1428c2ecf20Sopenharmony_ci unsigned int quantum; /* Allotment per round: MUST BE >= MTU */ 1438c2ecf20Sopenharmony_ci struct timer_list perturb_timer; 1448c2ecf20Sopenharmony_ci struct Qdisc *sch; 1458c2ecf20Sopenharmony_ci}; 1468c2ecf20Sopenharmony_ci 1478c2ecf20Sopenharmony_ci/* 1488c2ecf20Sopenharmony_ci * sfq_head are either in a sfq_slot or in dep[] array 1498c2ecf20Sopenharmony_ci */ 1508c2ecf20Sopenharmony_cistatic inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index val) 1518c2ecf20Sopenharmony_ci{ 1528c2ecf20Sopenharmony_ci if (val < SFQ_MAX_FLOWS) 1538c2ecf20Sopenharmony_ci return &q->slots[val].dep; 1548c2ecf20Sopenharmony_ci return &q->dep[val - SFQ_MAX_FLOWS]; 1558c2ecf20Sopenharmony_ci} 1568c2ecf20Sopenharmony_ci 1578c2ecf20Sopenharmony_cistatic unsigned int sfq_hash(const struct sfq_sched_data *q, 1588c2ecf20Sopenharmony_ci const struct sk_buff *skb) 1598c2ecf20Sopenharmony_ci{ 1608c2ecf20Sopenharmony_ci return skb_get_hash_perturb(skb, &q->perturbation) & (q->divisor - 1); 1618c2ecf20Sopenharmony_ci} 1628c2ecf20Sopenharmony_ci 1638c2ecf20Sopenharmony_cistatic unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch, 1648c2ecf20Sopenharmony_ci int *qerr) 1658c2ecf20Sopenharmony_ci{ 1668c2ecf20Sopenharmony_ci struct sfq_sched_data *q = qdisc_priv(sch); 1678c2ecf20Sopenharmony_ci struct tcf_result res; 1688c2ecf20Sopenharmony_ci struct tcf_proto *fl; 1698c2ecf20Sopenharmony_ci int result; 1708c2ecf20Sopenharmony_ci 1718c2ecf20Sopenharmony_ci if (TC_H_MAJ(skb->priority) == sch->handle && 1728c2ecf20Sopenharmony_ci TC_H_MIN(skb->priority) > 0 && 1738c2ecf20Sopenharmony_ci TC_H_MIN(skb->priority) <= q->divisor) 1748c2ecf20Sopenharmony_ci return TC_H_MIN(skb->priority); 1758c2ecf20Sopenharmony_ci 1768c2ecf20Sopenharmony_ci fl = rcu_dereference_bh(q->filter_list); 1778c2ecf20Sopenharmony_ci if (!fl) 1788c2ecf20Sopenharmony_ci return sfq_hash(q, skb) + 1; 1798c2ecf20Sopenharmony_ci 1808c2ecf20Sopenharmony_ci *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; 1818c2ecf20Sopenharmony_ci result = tcf_classify(skb, fl, &res, false); 1828c2ecf20Sopenharmony_ci if (result >= 0) { 1838c2ecf20Sopenharmony_ci#ifdef CONFIG_NET_CLS_ACT 1848c2ecf20Sopenharmony_ci switch (result) { 1858c2ecf20Sopenharmony_ci case TC_ACT_STOLEN: 1868c2ecf20Sopenharmony_ci case TC_ACT_QUEUED: 1878c2ecf20Sopenharmony_ci case TC_ACT_TRAP: 1888c2ecf20Sopenharmony_ci *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; 1898c2ecf20Sopenharmony_ci fallthrough; 1908c2ecf20Sopenharmony_ci case TC_ACT_SHOT: 1918c2ecf20Sopenharmony_ci return 0; 1928c2ecf20Sopenharmony_ci } 1938c2ecf20Sopenharmony_ci#endif 1948c2ecf20Sopenharmony_ci if (TC_H_MIN(res.classid) <= q->divisor) 1958c2ecf20Sopenharmony_ci return TC_H_MIN(res.classid); 1968c2ecf20Sopenharmony_ci } 1978c2ecf20Sopenharmony_ci return 0; 1988c2ecf20Sopenharmony_ci} 1998c2ecf20Sopenharmony_ci 2008c2ecf20Sopenharmony_ci/* 2018c2ecf20Sopenharmony_ci * x : slot number [0 .. SFQ_MAX_FLOWS - 1] 2028c2ecf20Sopenharmony_ci */ 2038c2ecf20Sopenharmony_cistatic inline void sfq_link(struct sfq_sched_data *q, sfq_index x) 2048c2ecf20Sopenharmony_ci{ 2058c2ecf20Sopenharmony_ci sfq_index p, n; 2068c2ecf20Sopenharmony_ci struct sfq_slot *slot = &q->slots[x]; 2078c2ecf20Sopenharmony_ci int qlen = slot->qlen; 2088c2ecf20Sopenharmony_ci 2098c2ecf20Sopenharmony_ci p = qlen + SFQ_MAX_FLOWS; 2108c2ecf20Sopenharmony_ci n = q->dep[qlen].next; 2118c2ecf20Sopenharmony_ci 2128c2ecf20Sopenharmony_ci slot->dep.next = n; 2138c2ecf20Sopenharmony_ci slot->dep.prev = p; 2148c2ecf20Sopenharmony_ci 2158c2ecf20Sopenharmony_ci q->dep[qlen].next = x; /* sfq_dep_head(q, p)->next = x */ 2168c2ecf20Sopenharmony_ci sfq_dep_head(q, n)->prev = x; 2178c2ecf20Sopenharmony_ci} 2188c2ecf20Sopenharmony_ci 2198c2ecf20Sopenharmony_ci#define sfq_unlink(q, x, n, p) \ 2208c2ecf20Sopenharmony_ci do { \ 2218c2ecf20Sopenharmony_ci n = q->slots[x].dep.next; \ 2228c2ecf20Sopenharmony_ci p = q->slots[x].dep.prev; \ 2238c2ecf20Sopenharmony_ci sfq_dep_head(q, p)->next = n; \ 2248c2ecf20Sopenharmony_ci sfq_dep_head(q, n)->prev = p; \ 2258c2ecf20Sopenharmony_ci } while (0) 2268c2ecf20Sopenharmony_ci 2278c2ecf20Sopenharmony_ci 2288c2ecf20Sopenharmony_cistatic inline void sfq_dec(struct sfq_sched_data *q, sfq_index x) 2298c2ecf20Sopenharmony_ci{ 2308c2ecf20Sopenharmony_ci sfq_index p, n; 2318c2ecf20Sopenharmony_ci int d; 2328c2ecf20Sopenharmony_ci 2338c2ecf20Sopenharmony_ci sfq_unlink(q, x, n, p); 2348c2ecf20Sopenharmony_ci 2358c2ecf20Sopenharmony_ci d = q->slots[x].qlen--; 2368c2ecf20Sopenharmony_ci if (n == p && q->cur_depth == d) 2378c2ecf20Sopenharmony_ci q->cur_depth--; 2388c2ecf20Sopenharmony_ci sfq_link(q, x); 2398c2ecf20Sopenharmony_ci} 2408c2ecf20Sopenharmony_ci 2418c2ecf20Sopenharmony_cistatic inline void sfq_inc(struct sfq_sched_data *q, sfq_index x) 2428c2ecf20Sopenharmony_ci{ 2438c2ecf20Sopenharmony_ci sfq_index p, n; 2448c2ecf20Sopenharmony_ci int d; 2458c2ecf20Sopenharmony_ci 2468c2ecf20Sopenharmony_ci sfq_unlink(q, x, n, p); 2478c2ecf20Sopenharmony_ci 2488c2ecf20Sopenharmony_ci d = ++q->slots[x].qlen; 2498c2ecf20Sopenharmony_ci if (q->cur_depth < d) 2508c2ecf20Sopenharmony_ci q->cur_depth = d; 2518c2ecf20Sopenharmony_ci sfq_link(q, x); 2528c2ecf20Sopenharmony_ci} 2538c2ecf20Sopenharmony_ci 2548c2ecf20Sopenharmony_ci/* helper functions : might be changed when/if skb use a standard list_head */ 2558c2ecf20Sopenharmony_ci 2568c2ecf20Sopenharmony_ci/* remove one skb from tail of slot queue */ 2578c2ecf20Sopenharmony_cistatic inline struct sk_buff *slot_dequeue_tail(struct sfq_slot *slot) 2588c2ecf20Sopenharmony_ci{ 2598c2ecf20Sopenharmony_ci struct sk_buff *skb = slot->skblist_prev; 2608c2ecf20Sopenharmony_ci 2618c2ecf20Sopenharmony_ci slot->skblist_prev = skb->prev; 2628c2ecf20Sopenharmony_ci skb->prev->next = (struct sk_buff *)slot; 2638c2ecf20Sopenharmony_ci skb->next = skb->prev = NULL; 2648c2ecf20Sopenharmony_ci return skb; 2658c2ecf20Sopenharmony_ci} 2668c2ecf20Sopenharmony_ci 2678c2ecf20Sopenharmony_ci/* remove one skb from head of slot queue */ 2688c2ecf20Sopenharmony_cistatic inline struct sk_buff *slot_dequeue_head(struct sfq_slot *slot) 2698c2ecf20Sopenharmony_ci{ 2708c2ecf20Sopenharmony_ci struct sk_buff *skb = slot->skblist_next; 2718c2ecf20Sopenharmony_ci 2728c2ecf20Sopenharmony_ci slot->skblist_next = skb->next; 2738c2ecf20Sopenharmony_ci skb->next->prev = (struct sk_buff *)slot; 2748c2ecf20Sopenharmony_ci skb->next = skb->prev = NULL; 2758c2ecf20Sopenharmony_ci return skb; 2768c2ecf20Sopenharmony_ci} 2778c2ecf20Sopenharmony_ci 2788c2ecf20Sopenharmony_cistatic inline void slot_queue_init(struct sfq_slot *slot) 2798c2ecf20Sopenharmony_ci{ 2808c2ecf20Sopenharmony_ci memset(slot, 0, sizeof(*slot)); 2818c2ecf20Sopenharmony_ci slot->skblist_prev = slot->skblist_next = (struct sk_buff *)slot; 2828c2ecf20Sopenharmony_ci} 2838c2ecf20Sopenharmony_ci 2848c2ecf20Sopenharmony_ci/* add skb to slot queue (tail add) */ 2858c2ecf20Sopenharmony_cistatic inline void slot_queue_add(struct sfq_slot *slot, struct sk_buff *skb) 2868c2ecf20Sopenharmony_ci{ 2878c2ecf20Sopenharmony_ci skb->prev = slot->skblist_prev; 2888c2ecf20Sopenharmony_ci skb->next = (struct sk_buff *)slot; 2898c2ecf20Sopenharmony_ci slot->skblist_prev->next = skb; 2908c2ecf20Sopenharmony_ci slot->skblist_prev = skb; 2918c2ecf20Sopenharmony_ci} 2928c2ecf20Sopenharmony_ci 2938c2ecf20Sopenharmony_cistatic unsigned int sfq_drop(struct Qdisc *sch, struct sk_buff **to_free) 2948c2ecf20Sopenharmony_ci{ 2958c2ecf20Sopenharmony_ci struct sfq_sched_data *q = qdisc_priv(sch); 2968c2ecf20Sopenharmony_ci sfq_index x, d = q->cur_depth; 2978c2ecf20Sopenharmony_ci struct sk_buff *skb; 2988c2ecf20Sopenharmony_ci unsigned int len; 2998c2ecf20Sopenharmony_ci struct sfq_slot *slot; 3008c2ecf20Sopenharmony_ci 3018c2ecf20Sopenharmony_ci /* Queue is full! Find the longest slot and drop tail packet from it */ 3028c2ecf20Sopenharmony_ci if (d > 1) { 3038c2ecf20Sopenharmony_ci x = q->dep[d].next; 3048c2ecf20Sopenharmony_ci slot = &q->slots[x]; 3058c2ecf20Sopenharmony_cidrop: 3068c2ecf20Sopenharmony_ci skb = q->headdrop ? slot_dequeue_head(slot) : slot_dequeue_tail(slot); 3078c2ecf20Sopenharmony_ci len = qdisc_pkt_len(skb); 3088c2ecf20Sopenharmony_ci slot->backlog -= len; 3098c2ecf20Sopenharmony_ci sfq_dec(q, x); 3108c2ecf20Sopenharmony_ci sch->q.qlen--; 3118c2ecf20Sopenharmony_ci qdisc_qstats_backlog_dec(sch, skb); 3128c2ecf20Sopenharmony_ci qdisc_drop(skb, sch, to_free); 3138c2ecf20Sopenharmony_ci return len; 3148c2ecf20Sopenharmony_ci } 3158c2ecf20Sopenharmony_ci 3168c2ecf20Sopenharmony_ci if (d == 1) { 3178c2ecf20Sopenharmony_ci /* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */ 3188c2ecf20Sopenharmony_ci x = q->tail->next; 3198c2ecf20Sopenharmony_ci slot = &q->slots[x]; 3208c2ecf20Sopenharmony_ci q->tail->next = slot->next; 3218c2ecf20Sopenharmony_ci q->ht[slot->hash] = SFQ_EMPTY_SLOT; 3228c2ecf20Sopenharmony_ci goto drop; 3238c2ecf20Sopenharmony_ci } 3248c2ecf20Sopenharmony_ci 3258c2ecf20Sopenharmony_ci return 0; 3268c2ecf20Sopenharmony_ci} 3278c2ecf20Sopenharmony_ci 3288c2ecf20Sopenharmony_ci/* Is ECN parameter configured */ 3298c2ecf20Sopenharmony_cistatic int sfq_prob_mark(const struct sfq_sched_data *q) 3308c2ecf20Sopenharmony_ci{ 3318c2ecf20Sopenharmony_ci return q->flags & TC_RED_ECN; 3328c2ecf20Sopenharmony_ci} 3338c2ecf20Sopenharmony_ci 3348c2ecf20Sopenharmony_ci/* Should packets over max threshold just be marked */ 3358c2ecf20Sopenharmony_cistatic int sfq_hard_mark(const struct sfq_sched_data *q) 3368c2ecf20Sopenharmony_ci{ 3378c2ecf20Sopenharmony_ci return (q->flags & (TC_RED_ECN | TC_RED_HARDDROP)) == TC_RED_ECN; 3388c2ecf20Sopenharmony_ci} 3398c2ecf20Sopenharmony_ci 3408c2ecf20Sopenharmony_cistatic int sfq_headdrop(const struct sfq_sched_data *q) 3418c2ecf20Sopenharmony_ci{ 3428c2ecf20Sopenharmony_ci return q->headdrop; 3438c2ecf20Sopenharmony_ci} 3448c2ecf20Sopenharmony_ci 3458c2ecf20Sopenharmony_cistatic int 3468c2ecf20Sopenharmony_cisfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) 3478c2ecf20Sopenharmony_ci{ 3488c2ecf20Sopenharmony_ci struct sfq_sched_data *q = qdisc_priv(sch); 3498c2ecf20Sopenharmony_ci unsigned int hash, dropped; 3508c2ecf20Sopenharmony_ci sfq_index x, qlen; 3518c2ecf20Sopenharmony_ci struct sfq_slot *slot; 3528c2ecf20Sopenharmony_ci int ret; 3538c2ecf20Sopenharmony_ci struct sk_buff *head; 3548c2ecf20Sopenharmony_ci int delta; 3558c2ecf20Sopenharmony_ci 3568c2ecf20Sopenharmony_ci hash = sfq_classify(skb, sch, &ret); 3578c2ecf20Sopenharmony_ci if (hash == 0) { 3588c2ecf20Sopenharmony_ci if (ret & __NET_XMIT_BYPASS) 3598c2ecf20Sopenharmony_ci qdisc_qstats_drop(sch); 3608c2ecf20Sopenharmony_ci __qdisc_drop(skb, to_free); 3618c2ecf20Sopenharmony_ci return ret; 3628c2ecf20Sopenharmony_ci } 3638c2ecf20Sopenharmony_ci hash--; 3648c2ecf20Sopenharmony_ci 3658c2ecf20Sopenharmony_ci x = q->ht[hash]; 3668c2ecf20Sopenharmony_ci slot = &q->slots[x]; 3678c2ecf20Sopenharmony_ci if (x == SFQ_EMPTY_SLOT) { 3688c2ecf20Sopenharmony_ci x = q->dep[0].next; /* get a free slot */ 3698c2ecf20Sopenharmony_ci if (x >= SFQ_MAX_FLOWS) 3708c2ecf20Sopenharmony_ci return qdisc_drop(skb, sch, to_free); 3718c2ecf20Sopenharmony_ci q->ht[hash] = x; 3728c2ecf20Sopenharmony_ci slot = &q->slots[x]; 3738c2ecf20Sopenharmony_ci slot->hash = hash; 3748c2ecf20Sopenharmony_ci slot->backlog = 0; /* should already be 0 anyway... */ 3758c2ecf20Sopenharmony_ci red_set_vars(&slot->vars); 3768c2ecf20Sopenharmony_ci goto enqueue; 3778c2ecf20Sopenharmony_ci } 3788c2ecf20Sopenharmony_ci if (q->red_parms) { 3798c2ecf20Sopenharmony_ci slot->vars.qavg = red_calc_qavg_no_idle_time(q->red_parms, 3808c2ecf20Sopenharmony_ci &slot->vars, 3818c2ecf20Sopenharmony_ci slot->backlog); 3828c2ecf20Sopenharmony_ci switch (red_action(q->red_parms, 3838c2ecf20Sopenharmony_ci &slot->vars, 3848c2ecf20Sopenharmony_ci slot->vars.qavg)) { 3858c2ecf20Sopenharmony_ci case RED_DONT_MARK: 3868c2ecf20Sopenharmony_ci break; 3878c2ecf20Sopenharmony_ci 3888c2ecf20Sopenharmony_ci case RED_PROB_MARK: 3898c2ecf20Sopenharmony_ci qdisc_qstats_overlimit(sch); 3908c2ecf20Sopenharmony_ci if (sfq_prob_mark(q)) { 3918c2ecf20Sopenharmony_ci /* We know we have at least one packet in queue */ 3928c2ecf20Sopenharmony_ci if (sfq_headdrop(q) && 3938c2ecf20Sopenharmony_ci INET_ECN_set_ce(slot->skblist_next)) { 3948c2ecf20Sopenharmony_ci q->stats.prob_mark_head++; 3958c2ecf20Sopenharmony_ci break; 3968c2ecf20Sopenharmony_ci } 3978c2ecf20Sopenharmony_ci if (INET_ECN_set_ce(skb)) { 3988c2ecf20Sopenharmony_ci q->stats.prob_mark++; 3998c2ecf20Sopenharmony_ci break; 4008c2ecf20Sopenharmony_ci } 4018c2ecf20Sopenharmony_ci } 4028c2ecf20Sopenharmony_ci q->stats.prob_drop++; 4038c2ecf20Sopenharmony_ci goto congestion_drop; 4048c2ecf20Sopenharmony_ci 4058c2ecf20Sopenharmony_ci case RED_HARD_MARK: 4068c2ecf20Sopenharmony_ci qdisc_qstats_overlimit(sch); 4078c2ecf20Sopenharmony_ci if (sfq_hard_mark(q)) { 4088c2ecf20Sopenharmony_ci /* We know we have at least one packet in queue */ 4098c2ecf20Sopenharmony_ci if (sfq_headdrop(q) && 4108c2ecf20Sopenharmony_ci INET_ECN_set_ce(slot->skblist_next)) { 4118c2ecf20Sopenharmony_ci q->stats.forced_mark_head++; 4128c2ecf20Sopenharmony_ci break; 4138c2ecf20Sopenharmony_ci } 4148c2ecf20Sopenharmony_ci if (INET_ECN_set_ce(skb)) { 4158c2ecf20Sopenharmony_ci q->stats.forced_mark++; 4168c2ecf20Sopenharmony_ci break; 4178c2ecf20Sopenharmony_ci } 4188c2ecf20Sopenharmony_ci } 4198c2ecf20Sopenharmony_ci q->stats.forced_drop++; 4208c2ecf20Sopenharmony_ci goto congestion_drop; 4218c2ecf20Sopenharmony_ci } 4228c2ecf20Sopenharmony_ci } 4238c2ecf20Sopenharmony_ci 4248c2ecf20Sopenharmony_ci if (slot->qlen >= q->maxdepth) { 4258c2ecf20Sopenharmony_cicongestion_drop: 4268c2ecf20Sopenharmony_ci if (!sfq_headdrop(q)) 4278c2ecf20Sopenharmony_ci return qdisc_drop(skb, sch, to_free); 4288c2ecf20Sopenharmony_ci 4298c2ecf20Sopenharmony_ci /* We know we have at least one packet in queue */ 4308c2ecf20Sopenharmony_ci head = slot_dequeue_head(slot); 4318c2ecf20Sopenharmony_ci delta = qdisc_pkt_len(head) - qdisc_pkt_len(skb); 4328c2ecf20Sopenharmony_ci sch->qstats.backlog -= delta; 4338c2ecf20Sopenharmony_ci slot->backlog -= delta; 4348c2ecf20Sopenharmony_ci qdisc_drop(head, sch, to_free); 4358c2ecf20Sopenharmony_ci 4368c2ecf20Sopenharmony_ci slot_queue_add(slot, skb); 4378c2ecf20Sopenharmony_ci qdisc_tree_reduce_backlog(sch, 0, delta); 4388c2ecf20Sopenharmony_ci return NET_XMIT_CN; 4398c2ecf20Sopenharmony_ci } 4408c2ecf20Sopenharmony_ci 4418c2ecf20Sopenharmony_cienqueue: 4428c2ecf20Sopenharmony_ci qdisc_qstats_backlog_inc(sch, skb); 4438c2ecf20Sopenharmony_ci slot->backlog += qdisc_pkt_len(skb); 4448c2ecf20Sopenharmony_ci slot_queue_add(slot, skb); 4458c2ecf20Sopenharmony_ci sfq_inc(q, x); 4468c2ecf20Sopenharmony_ci if (slot->qlen == 1) { /* The flow is new */ 4478c2ecf20Sopenharmony_ci if (q->tail == NULL) { /* It is the first flow */ 4488c2ecf20Sopenharmony_ci slot->next = x; 4498c2ecf20Sopenharmony_ci } else { 4508c2ecf20Sopenharmony_ci slot->next = q->tail->next; 4518c2ecf20Sopenharmony_ci q->tail->next = x; 4528c2ecf20Sopenharmony_ci } 4538c2ecf20Sopenharmony_ci /* We put this flow at the end of our flow list. 4548c2ecf20Sopenharmony_ci * This might sound unfair for a new flow to wait after old ones, 4558c2ecf20Sopenharmony_ci * but we could endup servicing new flows only, and freeze old ones. 4568c2ecf20Sopenharmony_ci */ 4578c2ecf20Sopenharmony_ci q->tail = slot; 4588c2ecf20Sopenharmony_ci /* We could use a bigger initial quantum for new flows */ 4598c2ecf20Sopenharmony_ci slot->allot = q->scaled_quantum; 4608c2ecf20Sopenharmony_ci } 4618c2ecf20Sopenharmony_ci if (++sch->q.qlen <= q->limit) 4628c2ecf20Sopenharmony_ci return NET_XMIT_SUCCESS; 4638c2ecf20Sopenharmony_ci 4648c2ecf20Sopenharmony_ci qlen = slot->qlen; 4658c2ecf20Sopenharmony_ci dropped = sfq_drop(sch, to_free); 4668c2ecf20Sopenharmony_ci /* Return Congestion Notification only if we dropped a packet 4678c2ecf20Sopenharmony_ci * from this flow. 4688c2ecf20Sopenharmony_ci */ 4698c2ecf20Sopenharmony_ci if (qlen != slot->qlen) { 4708c2ecf20Sopenharmony_ci qdisc_tree_reduce_backlog(sch, 0, dropped - qdisc_pkt_len(skb)); 4718c2ecf20Sopenharmony_ci return NET_XMIT_CN; 4728c2ecf20Sopenharmony_ci } 4738c2ecf20Sopenharmony_ci 4748c2ecf20Sopenharmony_ci /* As we dropped a packet, better let upper stack know this */ 4758c2ecf20Sopenharmony_ci qdisc_tree_reduce_backlog(sch, 1, dropped); 4768c2ecf20Sopenharmony_ci return NET_XMIT_SUCCESS; 4778c2ecf20Sopenharmony_ci} 4788c2ecf20Sopenharmony_ci 4798c2ecf20Sopenharmony_cistatic struct sk_buff * 4808c2ecf20Sopenharmony_cisfq_dequeue(struct Qdisc *sch) 4818c2ecf20Sopenharmony_ci{ 4828c2ecf20Sopenharmony_ci struct sfq_sched_data *q = qdisc_priv(sch); 4838c2ecf20Sopenharmony_ci struct sk_buff *skb; 4848c2ecf20Sopenharmony_ci sfq_index a, next_a; 4858c2ecf20Sopenharmony_ci struct sfq_slot *slot; 4868c2ecf20Sopenharmony_ci 4878c2ecf20Sopenharmony_ci /* No active slots */ 4888c2ecf20Sopenharmony_ci if (q->tail == NULL) 4898c2ecf20Sopenharmony_ci return NULL; 4908c2ecf20Sopenharmony_ci 4918c2ecf20Sopenharmony_cinext_slot: 4928c2ecf20Sopenharmony_ci a = q->tail->next; 4938c2ecf20Sopenharmony_ci slot = &q->slots[a]; 4948c2ecf20Sopenharmony_ci if (slot->allot <= 0) { 4958c2ecf20Sopenharmony_ci q->tail = slot; 4968c2ecf20Sopenharmony_ci slot->allot += q->scaled_quantum; 4978c2ecf20Sopenharmony_ci goto next_slot; 4988c2ecf20Sopenharmony_ci } 4998c2ecf20Sopenharmony_ci skb = slot_dequeue_head(slot); 5008c2ecf20Sopenharmony_ci sfq_dec(q, a); 5018c2ecf20Sopenharmony_ci qdisc_bstats_update(sch, skb); 5028c2ecf20Sopenharmony_ci sch->q.qlen--; 5038c2ecf20Sopenharmony_ci qdisc_qstats_backlog_dec(sch, skb); 5048c2ecf20Sopenharmony_ci slot->backlog -= qdisc_pkt_len(skb); 5058c2ecf20Sopenharmony_ci /* Is the slot empty? */ 5068c2ecf20Sopenharmony_ci if (slot->qlen == 0) { 5078c2ecf20Sopenharmony_ci q->ht[slot->hash] = SFQ_EMPTY_SLOT; 5088c2ecf20Sopenharmony_ci next_a = slot->next; 5098c2ecf20Sopenharmony_ci if (a == next_a) { 5108c2ecf20Sopenharmony_ci q->tail = NULL; /* no more active slots */ 5118c2ecf20Sopenharmony_ci return skb; 5128c2ecf20Sopenharmony_ci } 5138c2ecf20Sopenharmony_ci q->tail->next = next_a; 5148c2ecf20Sopenharmony_ci } else { 5158c2ecf20Sopenharmony_ci slot->allot -= SFQ_ALLOT_SIZE(qdisc_pkt_len(skb)); 5168c2ecf20Sopenharmony_ci } 5178c2ecf20Sopenharmony_ci return skb; 5188c2ecf20Sopenharmony_ci} 5198c2ecf20Sopenharmony_ci 5208c2ecf20Sopenharmony_cistatic void 5218c2ecf20Sopenharmony_cisfq_reset(struct Qdisc *sch) 5228c2ecf20Sopenharmony_ci{ 5238c2ecf20Sopenharmony_ci struct sk_buff *skb; 5248c2ecf20Sopenharmony_ci 5258c2ecf20Sopenharmony_ci while ((skb = sfq_dequeue(sch)) != NULL) 5268c2ecf20Sopenharmony_ci rtnl_kfree_skbs(skb, skb); 5278c2ecf20Sopenharmony_ci} 5288c2ecf20Sopenharmony_ci 5298c2ecf20Sopenharmony_ci/* 5308c2ecf20Sopenharmony_ci * When q->perturbation is changed, we rehash all queued skbs 5318c2ecf20Sopenharmony_ci * to avoid OOO (Out Of Order) effects. 5328c2ecf20Sopenharmony_ci * We dont use sfq_dequeue()/sfq_enqueue() because we dont want to change 5338c2ecf20Sopenharmony_ci * counters. 5348c2ecf20Sopenharmony_ci */ 5358c2ecf20Sopenharmony_cistatic void sfq_rehash(struct Qdisc *sch) 5368c2ecf20Sopenharmony_ci{ 5378c2ecf20Sopenharmony_ci struct sfq_sched_data *q = qdisc_priv(sch); 5388c2ecf20Sopenharmony_ci struct sk_buff *skb; 5398c2ecf20Sopenharmony_ci int i; 5408c2ecf20Sopenharmony_ci struct sfq_slot *slot; 5418c2ecf20Sopenharmony_ci struct sk_buff_head list; 5428c2ecf20Sopenharmony_ci int dropped = 0; 5438c2ecf20Sopenharmony_ci unsigned int drop_len = 0; 5448c2ecf20Sopenharmony_ci 5458c2ecf20Sopenharmony_ci __skb_queue_head_init(&list); 5468c2ecf20Sopenharmony_ci 5478c2ecf20Sopenharmony_ci for (i = 0; i < q->maxflows; i++) { 5488c2ecf20Sopenharmony_ci slot = &q->slots[i]; 5498c2ecf20Sopenharmony_ci if (!slot->qlen) 5508c2ecf20Sopenharmony_ci continue; 5518c2ecf20Sopenharmony_ci while (slot->qlen) { 5528c2ecf20Sopenharmony_ci skb = slot_dequeue_head(slot); 5538c2ecf20Sopenharmony_ci sfq_dec(q, i); 5548c2ecf20Sopenharmony_ci __skb_queue_tail(&list, skb); 5558c2ecf20Sopenharmony_ci } 5568c2ecf20Sopenharmony_ci slot->backlog = 0; 5578c2ecf20Sopenharmony_ci red_set_vars(&slot->vars); 5588c2ecf20Sopenharmony_ci q->ht[slot->hash] = SFQ_EMPTY_SLOT; 5598c2ecf20Sopenharmony_ci } 5608c2ecf20Sopenharmony_ci q->tail = NULL; 5618c2ecf20Sopenharmony_ci 5628c2ecf20Sopenharmony_ci while ((skb = __skb_dequeue(&list)) != NULL) { 5638c2ecf20Sopenharmony_ci unsigned int hash = sfq_hash(q, skb); 5648c2ecf20Sopenharmony_ci sfq_index x = q->ht[hash]; 5658c2ecf20Sopenharmony_ci 5668c2ecf20Sopenharmony_ci slot = &q->slots[x]; 5678c2ecf20Sopenharmony_ci if (x == SFQ_EMPTY_SLOT) { 5688c2ecf20Sopenharmony_ci x = q->dep[0].next; /* get a free slot */ 5698c2ecf20Sopenharmony_ci if (x >= SFQ_MAX_FLOWS) { 5708c2ecf20Sopenharmony_cidrop: 5718c2ecf20Sopenharmony_ci qdisc_qstats_backlog_dec(sch, skb); 5728c2ecf20Sopenharmony_ci drop_len += qdisc_pkt_len(skb); 5738c2ecf20Sopenharmony_ci kfree_skb(skb); 5748c2ecf20Sopenharmony_ci dropped++; 5758c2ecf20Sopenharmony_ci continue; 5768c2ecf20Sopenharmony_ci } 5778c2ecf20Sopenharmony_ci q->ht[hash] = x; 5788c2ecf20Sopenharmony_ci slot = &q->slots[x]; 5798c2ecf20Sopenharmony_ci slot->hash = hash; 5808c2ecf20Sopenharmony_ci } 5818c2ecf20Sopenharmony_ci if (slot->qlen >= q->maxdepth) 5828c2ecf20Sopenharmony_ci goto drop; 5838c2ecf20Sopenharmony_ci slot_queue_add(slot, skb); 5848c2ecf20Sopenharmony_ci if (q->red_parms) 5858c2ecf20Sopenharmony_ci slot->vars.qavg = red_calc_qavg(q->red_parms, 5868c2ecf20Sopenharmony_ci &slot->vars, 5878c2ecf20Sopenharmony_ci slot->backlog); 5888c2ecf20Sopenharmony_ci slot->backlog += qdisc_pkt_len(skb); 5898c2ecf20Sopenharmony_ci sfq_inc(q, x); 5908c2ecf20Sopenharmony_ci if (slot->qlen == 1) { /* The flow is new */ 5918c2ecf20Sopenharmony_ci if (q->tail == NULL) { /* It is the first flow */ 5928c2ecf20Sopenharmony_ci slot->next = x; 5938c2ecf20Sopenharmony_ci } else { 5948c2ecf20Sopenharmony_ci slot->next = q->tail->next; 5958c2ecf20Sopenharmony_ci q->tail->next = x; 5968c2ecf20Sopenharmony_ci } 5978c2ecf20Sopenharmony_ci q->tail = slot; 5988c2ecf20Sopenharmony_ci slot->allot = q->scaled_quantum; 5998c2ecf20Sopenharmony_ci } 6008c2ecf20Sopenharmony_ci } 6018c2ecf20Sopenharmony_ci sch->q.qlen -= dropped; 6028c2ecf20Sopenharmony_ci qdisc_tree_reduce_backlog(sch, dropped, drop_len); 6038c2ecf20Sopenharmony_ci} 6048c2ecf20Sopenharmony_ci 6058c2ecf20Sopenharmony_cistatic void sfq_perturbation(struct timer_list *t) 6068c2ecf20Sopenharmony_ci{ 6078c2ecf20Sopenharmony_ci struct sfq_sched_data *q = from_timer(q, t, perturb_timer); 6088c2ecf20Sopenharmony_ci struct Qdisc *sch = q->sch; 6098c2ecf20Sopenharmony_ci spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch)); 6108c2ecf20Sopenharmony_ci siphash_key_t nkey; 6118c2ecf20Sopenharmony_ci 6128c2ecf20Sopenharmony_ci get_random_bytes(&nkey, sizeof(nkey)); 6138c2ecf20Sopenharmony_ci spin_lock(root_lock); 6148c2ecf20Sopenharmony_ci q->perturbation = nkey; 6158c2ecf20Sopenharmony_ci if (!q->filter_list && q->tail) 6168c2ecf20Sopenharmony_ci sfq_rehash(sch); 6178c2ecf20Sopenharmony_ci spin_unlock(root_lock); 6188c2ecf20Sopenharmony_ci 6198c2ecf20Sopenharmony_ci if (q->perturb_period) 6208c2ecf20Sopenharmony_ci mod_timer(&q->perturb_timer, jiffies + q->perturb_period); 6218c2ecf20Sopenharmony_ci} 6228c2ecf20Sopenharmony_ci 6238c2ecf20Sopenharmony_cistatic int sfq_change(struct Qdisc *sch, struct nlattr *opt) 6248c2ecf20Sopenharmony_ci{ 6258c2ecf20Sopenharmony_ci struct sfq_sched_data *q = qdisc_priv(sch); 6268c2ecf20Sopenharmony_ci struct tc_sfq_qopt *ctl = nla_data(opt); 6278c2ecf20Sopenharmony_ci struct tc_sfq_qopt_v1 *ctl_v1 = NULL; 6288c2ecf20Sopenharmony_ci unsigned int qlen, dropped = 0; 6298c2ecf20Sopenharmony_ci struct red_parms *p = NULL; 6308c2ecf20Sopenharmony_ci struct sk_buff *to_free = NULL; 6318c2ecf20Sopenharmony_ci struct sk_buff *tail = NULL; 6328c2ecf20Sopenharmony_ci 6338c2ecf20Sopenharmony_ci if (opt->nla_len < nla_attr_size(sizeof(*ctl))) 6348c2ecf20Sopenharmony_ci return -EINVAL; 6358c2ecf20Sopenharmony_ci if (opt->nla_len >= nla_attr_size(sizeof(*ctl_v1))) 6368c2ecf20Sopenharmony_ci ctl_v1 = nla_data(opt); 6378c2ecf20Sopenharmony_ci if (ctl->divisor && 6388c2ecf20Sopenharmony_ci (!is_power_of_2(ctl->divisor) || ctl->divisor > 65536)) 6398c2ecf20Sopenharmony_ci return -EINVAL; 6408c2ecf20Sopenharmony_ci 6418c2ecf20Sopenharmony_ci /* slot->allot is a short, make sure quantum is not too big. */ 6428c2ecf20Sopenharmony_ci if (ctl->quantum) { 6438c2ecf20Sopenharmony_ci unsigned int scaled = SFQ_ALLOT_SIZE(ctl->quantum); 6448c2ecf20Sopenharmony_ci 6458c2ecf20Sopenharmony_ci if (scaled <= 0 || scaled > SHRT_MAX) 6468c2ecf20Sopenharmony_ci return -EINVAL; 6478c2ecf20Sopenharmony_ci } 6488c2ecf20Sopenharmony_ci 6498c2ecf20Sopenharmony_ci if (ctl_v1 && !red_check_params(ctl_v1->qth_min, ctl_v1->qth_max, 6508c2ecf20Sopenharmony_ci ctl_v1->Wlog, ctl_v1->Scell_log, NULL)) 6518c2ecf20Sopenharmony_ci return -EINVAL; 6528c2ecf20Sopenharmony_ci if (ctl_v1 && ctl_v1->qth_min) { 6538c2ecf20Sopenharmony_ci p = kmalloc(sizeof(*p), GFP_KERNEL); 6548c2ecf20Sopenharmony_ci if (!p) 6558c2ecf20Sopenharmony_ci return -ENOMEM; 6568c2ecf20Sopenharmony_ci } 6578c2ecf20Sopenharmony_ci sch_tree_lock(sch); 6588c2ecf20Sopenharmony_ci if (ctl->quantum) { 6598c2ecf20Sopenharmony_ci q->quantum = ctl->quantum; 6608c2ecf20Sopenharmony_ci q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum); 6618c2ecf20Sopenharmony_ci } 6628c2ecf20Sopenharmony_ci q->perturb_period = ctl->perturb_period * HZ; 6638c2ecf20Sopenharmony_ci if (ctl->flows) 6648c2ecf20Sopenharmony_ci q->maxflows = min_t(u32, ctl->flows, SFQ_MAX_FLOWS); 6658c2ecf20Sopenharmony_ci if (ctl->divisor) { 6668c2ecf20Sopenharmony_ci q->divisor = ctl->divisor; 6678c2ecf20Sopenharmony_ci q->maxflows = min_t(u32, q->maxflows, q->divisor); 6688c2ecf20Sopenharmony_ci } 6698c2ecf20Sopenharmony_ci if (ctl_v1) { 6708c2ecf20Sopenharmony_ci if (ctl_v1->depth) 6718c2ecf20Sopenharmony_ci q->maxdepth = min_t(u32, ctl_v1->depth, SFQ_MAX_DEPTH); 6728c2ecf20Sopenharmony_ci if (p) { 6738c2ecf20Sopenharmony_ci swap(q->red_parms, p); 6748c2ecf20Sopenharmony_ci red_set_parms(q->red_parms, 6758c2ecf20Sopenharmony_ci ctl_v1->qth_min, ctl_v1->qth_max, 6768c2ecf20Sopenharmony_ci ctl_v1->Wlog, 6778c2ecf20Sopenharmony_ci ctl_v1->Plog, ctl_v1->Scell_log, 6788c2ecf20Sopenharmony_ci NULL, 6798c2ecf20Sopenharmony_ci ctl_v1->max_P); 6808c2ecf20Sopenharmony_ci } 6818c2ecf20Sopenharmony_ci q->flags = ctl_v1->flags; 6828c2ecf20Sopenharmony_ci q->headdrop = ctl_v1->headdrop; 6838c2ecf20Sopenharmony_ci } 6848c2ecf20Sopenharmony_ci if (ctl->limit) { 6858c2ecf20Sopenharmony_ci q->limit = min_t(u32, ctl->limit, q->maxdepth * q->maxflows); 6868c2ecf20Sopenharmony_ci q->maxflows = min_t(u32, q->maxflows, q->limit); 6878c2ecf20Sopenharmony_ci } 6888c2ecf20Sopenharmony_ci 6898c2ecf20Sopenharmony_ci qlen = sch->q.qlen; 6908c2ecf20Sopenharmony_ci while (sch->q.qlen > q->limit) { 6918c2ecf20Sopenharmony_ci dropped += sfq_drop(sch, &to_free); 6928c2ecf20Sopenharmony_ci if (!tail) 6938c2ecf20Sopenharmony_ci tail = to_free; 6948c2ecf20Sopenharmony_ci } 6958c2ecf20Sopenharmony_ci 6968c2ecf20Sopenharmony_ci rtnl_kfree_skbs(to_free, tail); 6978c2ecf20Sopenharmony_ci qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped); 6988c2ecf20Sopenharmony_ci 6998c2ecf20Sopenharmony_ci del_timer(&q->perturb_timer); 7008c2ecf20Sopenharmony_ci if (q->perturb_period) { 7018c2ecf20Sopenharmony_ci mod_timer(&q->perturb_timer, jiffies + q->perturb_period); 7028c2ecf20Sopenharmony_ci get_random_bytes(&q->perturbation, sizeof(q->perturbation)); 7038c2ecf20Sopenharmony_ci } 7048c2ecf20Sopenharmony_ci sch_tree_unlock(sch); 7058c2ecf20Sopenharmony_ci kfree(p); 7068c2ecf20Sopenharmony_ci return 0; 7078c2ecf20Sopenharmony_ci} 7088c2ecf20Sopenharmony_ci 7098c2ecf20Sopenharmony_cistatic void *sfq_alloc(size_t sz) 7108c2ecf20Sopenharmony_ci{ 7118c2ecf20Sopenharmony_ci return kvmalloc(sz, GFP_KERNEL); 7128c2ecf20Sopenharmony_ci} 7138c2ecf20Sopenharmony_ci 7148c2ecf20Sopenharmony_cistatic void sfq_free(void *addr) 7158c2ecf20Sopenharmony_ci{ 7168c2ecf20Sopenharmony_ci kvfree(addr); 7178c2ecf20Sopenharmony_ci} 7188c2ecf20Sopenharmony_ci 7198c2ecf20Sopenharmony_cistatic void sfq_destroy(struct Qdisc *sch) 7208c2ecf20Sopenharmony_ci{ 7218c2ecf20Sopenharmony_ci struct sfq_sched_data *q = qdisc_priv(sch); 7228c2ecf20Sopenharmony_ci 7238c2ecf20Sopenharmony_ci tcf_block_put(q->block); 7248c2ecf20Sopenharmony_ci q->perturb_period = 0; 7258c2ecf20Sopenharmony_ci del_timer_sync(&q->perturb_timer); 7268c2ecf20Sopenharmony_ci sfq_free(q->ht); 7278c2ecf20Sopenharmony_ci sfq_free(q->slots); 7288c2ecf20Sopenharmony_ci kfree(q->red_parms); 7298c2ecf20Sopenharmony_ci} 7308c2ecf20Sopenharmony_ci 7318c2ecf20Sopenharmony_cistatic int sfq_init(struct Qdisc *sch, struct nlattr *opt, 7328c2ecf20Sopenharmony_ci struct netlink_ext_ack *extack) 7338c2ecf20Sopenharmony_ci{ 7348c2ecf20Sopenharmony_ci struct sfq_sched_data *q = qdisc_priv(sch); 7358c2ecf20Sopenharmony_ci int i; 7368c2ecf20Sopenharmony_ci int err; 7378c2ecf20Sopenharmony_ci 7388c2ecf20Sopenharmony_ci q->sch = sch; 7398c2ecf20Sopenharmony_ci timer_setup(&q->perturb_timer, sfq_perturbation, TIMER_DEFERRABLE); 7408c2ecf20Sopenharmony_ci 7418c2ecf20Sopenharmony_ci err = tcf_block_get(&q->block, &q->filter_list, sch, extack); 7428c2ecf20Sopenharmony_ci if (err) 7438c2ecf20Sopenharmony_ci return err; 7448c2ecf20Sopenharmony_ci 7458c2ecf20Sopenharmony_ci for (i = 0; i < SFQ_MAX_DEPTH + 1; i++) { 7468c2ecf20Sopenharmony_ci q->dep[i].next = i + SFQ_MAX_FLOWS; 7478c2ecf20Sopenharmony_ci q->dep[i].prev = i + SFQ_MAX_FLOWS; 7488c2ecf20Sopenharmony_ci } 7498c2ecf20Sopenharmony_ci 7508c2ecf20Sopenharmony_ci q->limit = SFQ_MAX_DEPTH; 7518c2ecf20Sopenharmony_ci q->maxdepth = SFQ_MAX_DEPTH; 7528c2ecf20Sopenharmony_ci q->cur_depth = 0; 7538c2ecf20Sopenharmony_ci q->tail = NULL; 7548c2ecf20Sopenharmony_ci q->divisor = SFQ_DEFAULT_HASH_DIVISOR; 7558c2ecf20Sopenharmony_ci q->maxflows = SFQ_DEFAULT_FLOWS; 7568c2ecf20Sopenharmony_ci q->quantum = psched_mtu(qdisc_dev(sch)); 7578c2ecf20Sopenharmony_ci q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum); 7588c2ecf20Sopenharmony_ci q->perturb_period = 0; 7598c2ecf20Sopenharmony_ci get_random_bytes(&q->perturbation, sizeof(q->perturbation)); 7608c2ecf20Sopenharmony_ci 7618c2ecf20Sopenharmony_ci if (opt) { 7628c2ecf20Sopenharmony_ci int err = sfq_change(sch, opt); 7638c2ecf20Sopenharmony_ci if (err) 7648c2ecf20Sopenharmony_ci return err; 7658c2ecf20Sopenharmony_ci } 7668c2ecf20Sopenharmony_ci 7678c2ecf20Sopenharmony_ci q->ht = sfq_alloc(sizeof(q->ht[0]) * q->divisor); 7688c2ecf20Sopenharmony_ci q->slots = sfq_alloc(sizeof(q->slots[0]) * q->maxflows); 7698c2ecf20Sopenharmony_ci if (!q->ht || !q->slots) { 7708c2ecf20Sopenharmony_ci /* Note: sfq_destroy() will be called by our caller */ 7718c2ecf20Sopenharmony_ci return -ENOMEM; 7728c2ecf20Sopenharmony_ci } 7738c2ecf20Sopenharmony_ci 7748c2ecf20Sopenharmony_ci for (i = 0; i < q->divisor; i++) 7758c2ecf20Sopenharmony_ci q->ht[i] = SFQ_EMPTY_SLOT; 7768c2ecf20Sopenharmony_ci 7778c2ecf20Sopenharmony_ci for (i = 0; i < q->maxflows; i++) { 7788c2ecf20Sopenharmony_ci slot_queue_init(&q->slots[i]); 7798c2ecf20Sopenharmony_ci sfq_link(q, i); 7808c2ecf20Sopenharmony_ci } 7818c2ecf20Sopenharmony_ci if (q->limit >= 1) 7828c2ecf20Sopenharmony_ci sch->flags |= TCQ_F_CAN_BYPASS; 7838c2ecf20Sopenharmony_ci else 7848c2ecf20Sopenharmony_ci sch->flags &= ~TCQ_F_CAN_BYPASS; 7858c2ecf20Sopenharmony_ci return 0; 7868c2ecf20Sopenharmony_ci} 7878c2ecf20Sopenharmony_ci 7888c2ecf20Sopenharmony_cistatic int sfq_dump(struct Qdisc *sch, struct sk_buff *skb) 7898c2ecf20Sopenharmony_ci{ 7908c2ecf20Sopenharmony_ci struct sfq_sched_data *q = qdisc_priv(sch); 7918c2ecf20Sopenharmony_ci unsigned char *b = skb_tail_pointer(skb); 7928c2ecf20Sopenharmony_ci struct tc_sfq_qopt_v1 opt; 7938c2ecf20Sopenharmony_ci struct red_parms *p = q->red_parms; 7948c2ecf20Sopenharmony_ci 7958c2ecf20Sopenharmony_ci memset(&opt, 0, sizeof(opt)); 7968c2ecf20Sopenharmony_ci opt.v0.quantum = q->quantum; 7978c2ecf20Sopenharmony_ci opt.v0.perturb_period = q->perturb_period / HZ; 7988c2ecf20Sopenharmony_ci opt.v0.limit = q->limit; 7998c2ecf20Sopenharmony_ci opt.v0.divisor = q->divisor; 8008c2ecf20Sopenharmony_ci opt.v0.flows = q->maxflows; 8018c2ecf20Sopenharmony_ci opt.depth = q->maxdepth; 8028c2ecf20Sopenharmony_ci opt.headdrop = q->headdrop; 8038c2ecf20Sopenharmony_ci 8048c2ecf20Sopenharmony_ci if (p) { 8058c2ecf20Sopenharmony_ci opt.qth_min = p->qth_min >> p->Wlog; 8068c2ecf20Sopenharmony_ci opt.qth_max = p->qth_max >> p->Wlog; 8078c2ecf20Sopenharmony_ci opt.Wlog = p->Wlog; 8088c2ecf20Sopenharmony_ci opt.Plog = p->Plog; 8098c2ecf20Sopenharmony_ci opt.Scell_log = p->Scell_log; 8108c2ecf20Sopenharmony_ci opt.max_P = p->max_P; 8118c2ecf20Sopenharmony_ci } 8128c2ecf20Sopenharmony_ci memcpy(&opt.stats, &q->stats, sizeof(opt.stats)); 8138c2ecf20Sopenharmony_ci opt.flags = q->flags; 8148c2ecf20Sopenharmony_ci 8158c2ecf20Sopenharmony_ci if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt)) 8168c2ecf20Sopenharmony_ci goto nla_put_failure; 8178c2ecf20Sopenharmony_ci 8188c2ecf20Sopenharmony_ci return skb->len; 8198c2ecf20Sopenharmony_ci 8208c2ecf20Sopenharmony_cinla_put_failure: 8218c2ecf20Sopenharmony_ci nlmsg_trim(skb, b); 8228c2ecf20Sopenharmony_ci return -1; 8238c2ecf20Sopenharmony_ci} 8248c2ecf20Sopenharmony_ci 8258c2ecf20Sopenharmony_cistatic struct Qdisc *sfq_leaf(struct Qdisc *sch, unsigned long arg) 8268c2ecf20Sopenharmony_ci{ 8278c2ecf20Sopenharmony_ci return NULL; 8288c2ecf20Sopenharmony_ci} 8298c2ecf20Sopenharmony_ci 8308c2ecf20Sopenharmony_cistatic unsigned long sfq_find(struct Qdisc *sch, u32 classid) 8318c2ecf20Sopenharmony_ci{ 8328c2ecf20Sopenharmony_ci return 0; 8338c2ecf20Sopenharmony_ci} 8348c2ecf20Sopenharmony_ci 8358c2ecf20Sopenharmony_cistatic unsigned long sfq_bind(struct Qdisc *sch, unsigned long parent, 8368c2ecf20Sopenharmony_ci u32 classid) 8378c2ecf20Sopenharmony_ci{ 8388c2ecf20Sopenharmony_ci return 0; 8398c2ecf20Sopenharmony_ci} 8408c2ecf20Sopenharmony_ci 8418c2ecf20Sopenharmony_cistatic void sfq_unbind(struct Qdisc *q, unsigned long cl) 8428c2ecf20Sopenharmony_ci{ 8438c2ecf20Sopenharmony_ci} 8448c2ecf20Sopenharmony_ci 8458c2ecf20Sopenharmony_cistatic struct tcf_block *sfq_tcf_block(struct Qdisc *sch, unsigned long cl, 8468c2ecf20Sopenharmony_ci struct netlink_ext_ack *extack) 8478c2ecf20Sopenharmony_ci{ 8488c2ecf20Sopenharmony_ci struct sfq_sched_data *q = qdisc_priv(sch); 8498c2ecf20Sopenharmony_ci 8508c2ecf20Sopenharmony_ci if (cl) 8518c2ecf20Sopenharmony_ci return NULL; 8528c2ecf20Sopenharmony_ci return q->block; 8538c2ecf20Sopenharmony_ci} 8548c2ecf20Sopenharmony_ci 8558c2ecf20Sopenharmony_cistatic int sfq_dump_class(struct Qdisc *sch, unsigned long cl, 8568c2ecf20Sopenharmony_ci struct sk_buff *skb, struct tcmsg *tcm) 8578c2ecf20Sopenharmony_ci{ 8588c2ecf20Sopenharmony_ci tcm->tcm_handle |= TC_H_MIN(cl); 8598c2ecf20Sopenharmony_ci return 0; 8608c2ecf20Sopenharmony_ci} 8618c2ecf20Sopenharmony_ci 8628c2ecf20Sopenharmony_cistatic int sfq_dump_class_stats(struct Qdisc *sch, unsigned long cl, 8638c2ecf20Sopenharmony_ci struct gnet_dump *d) 8648c2ecf20Sopenharmony_ci{ 8658c2ecf20Sopenharmony_ci struct sfq_sched_data *q = qdisc_priv(sch); 8668c2ecf20Sopenharmony_ci sfq_index idx = q->ht[cl - 1]; 8678c2ecf20Sopenharmony_ci struct gnet_stats_queue qs = { 0 }; 8688c2ecf20Sopenharmony_ci struct tc_sfq_xstats xstats = { 0 }; 8698c2ecf20Sopenharmony_ci 8708c2ecf20Sopenharmony_ci if (idx != SFQ_EMPTY_SLOT) { 8718c2ecf20Sopenharmony_ci const struct sfq_slot *slot = &q->slots[idx]; 8728c2ecf20Sopenharmony_ci 8738c2ecf20Sopenharmony_ci xstats.allot = slot->allot << SFQ_ALLOT_SHIFT; 8748c2ecf20Sopenharmony_ci qs.qlen = slot->qlen; 8758c2ecf20Sopenharmony_ci qs.backlog = slot->backlog; 8768c2ecf20Sopenharmony_ci } 8778c2ecf20Sopenharmony_ci if (gnet_stats_copy_queue(d, NULL, &qs, qs.qlen) < 0) 8788c2ecf20Sopenharmony_ci return -1; 8798c2ecf20Sopenharmony_ci return gnet_stats_copy_app(d, &xstats, sizeof(xstats)); 8808c2ecf20Sopenharmony_ci} 8818c2ecf20Sopenharmony_ci 8828c2ecf20Sopenharmony_cistatic void sfq_walk(struct Qdisc *sch, struct qdisc_walker *arg) 8838c2ecf20Sopenharmony_ci{ 8848c2ecf20Sopenharmony_ci struct sfq_sched_data *q = qdisc_priv(sch); 8858c2ecf20Sopenharmony_ci unsigned int i; 8868c2ecf20Sopenharmony_ci 8878c2ecf20Sopenharmony_ci if (arg->stop) 8888c2ecf20Sopenharmony_ci return; 8898c2ecf20Sopenharmony_ci 8908c2ecf20Sopenharmony_ci for (i = 0; i < q->divisor; i++) { 8918c2ecf20Sopenharmony_ci if (q->ht[i] == SFQ_EMPTY_SLOT || 8928c2ecf20Sopenharmony_ci arg->count < arg->skip) { 8938c2ecf20Sopenharmony_ci arg->count++; 8948c2ecf20Sopenharmony_ci continue; 8958c2ecf20Sopenharmony_ci } 8968c2ecf20Sopenharmony_ci if (arg->fn(sch, i + 1, arg) < 0) { 8978c2ecf20Sopenharmony_ci arg->stop = 1; 8988c2ecf20Sopenharmony_ci break; 8998c2ecf20Sopenharmony_ci } 9008c2ecf20Sopenharmony_ci arg->count++; 9018c2ecf20Sopenharmony_ci } 9028c2ecf20Sopenharmony_ci} 9038c2ecf20Sopenharmony_ci 9048c2ecf20Sopenharmony_cistatic const struct Qdisc_class_ops sfq_class_ops = { 9058c2ecf20Sopenharmony_ci .leaf = sfq_leaf, 9068c2ecf20Sopenharmony_ci .find = sfq_find, 9078c2ecf20Sopenharmony_ci .tcf_block = sfq_tcf_block, 9088c2ecf20Sopenharmony_ci .bind_tcf = sfq_bind, 9098c2ecf20Sopenharmony_ci .unbind_tcf = sfq_unbind, 9108c2ecf20Sopenharmony_ci .dump = sfq_dump_class, 9118c2ecf20Sopenharmony_ci .dump_stats = sfq_dump_class_stats, 9128c2ecf20Sopenharmony_ci .walk = sfq_walk, 9138c2ecf20Sopenharmony_ci}; 9148c2ecf20Sopenharmony_ci 9158c2ecf20Sopenharmony_cistatic struct Qdisc_ops sfq_qdisc_ops __read_mostly = { 9168c2ecf20Sopenharmony_ci .cl_ops = &sfq_class_ops, 9178c2ecf20Sopenharmony_ci .id = "sfq", 9188c2ecf20Sopenharmony_ci .priv_size = sizeof(struct sfq_sched_data), 9198c2ecf20Sopenharmony_ci .enqueue = sfq_enqueue, 9208c2ecf20Sopenharmony_ci .dequeue = sfq_dequeue, 9218c2ecf20Sopenharmony_ci .peek = qdisc_peek_dequeued, 9228c2ecf20Sopenharmony_ci .init = sfq_init, 9238c2ecf20Sopenharmony_ci .reset = sfq_reset, 9248c2ecf20Sopenharmony_ci .destroy = sfq_destroy, 9258c2ecf20Sopenharmony_ci .change = NULL, 9268c2ecf20Sopenharmony_ci .dump = sfq_dump, 9278c2ecf20Sopenharmony_ci .owner = THIS_MODULE, 9288c2ecf20Sopenharmony_ci}; 9298c2ecf20Sopenharmony_ci 9308c2ecf20Sopenharmony_cistatic int __init sfq_module_init(void) 9318c2ecf20Sopenharmony_ci{ 9328c2ecf20Sopenharmony_ci return register_qdisc(&sfq_qdisc_ops); 9338c2ecf20Sopenharmony_ci} 9348c2ecf20Sopenharmony_cistatic void __exit sfq_module_exit(void) 9358c2ecf20Sopenharmony_ci{ 9368c2ecf20Sopenharmony_ci unregister_qdisc(&sfq_qdisc_ops); 9378c2ecf20Sopenharmony_ci} 9388c2ecf20Sopenharmony_cimodule_init(sfq_module_init) 9398c2ecf20Sopenharmony_cimodule_exit(sfq_module_exit) 9408c2ecf20Sopenharmony_ciMODULE_LICENSE("GPL"); 941