162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * net/sched/sch_sfq.c Stochastic Fairness Queueing discipline. 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 662306a36Sopenharmony_ci */ 762306a36Sopenharmony_ci 862306a36Sopenharmony_ci#include <linux/module.h> 962306a36Sopenharmony_ci#include <linux/types.h> 1062306a36Sopenharmony_ci#include <linux/kernel.h> 1162306a36Sopenharmony_ci#include <linux/jiffies.h> 1262306a36Sopenharmony_ci#include <linux/string.h> 1362306a36Sopenharmony_ci#include <linux/in.h> 1462306a36Sopenharmony_ci#include <linux/errno.h> 1562306a36Sopenharmony_ci#include <linux/init.h> 1662306a36Sopenharmony_ci#include <linux/skbuff.h> 1762306a36Sopenharmony_ci#include <linux/siphash.h> 1862306a36Sopenharmony_ci#include <linux/slab.h> 1962306a36Sopenharmony_ci#include <linux/vmalloc.h> 2062306a36Sopenharmony_ci#include <net/netlink.h> 2162306a36Sopenharmony_ci#include <net/pkt_sched.h> 2262306a36Sopenharmony_ci#include <net/pkt_cls.h> 2362306a36Sopenharmony_ci#include <net/red.h> 2462306a36Sopenharmony_ci 2562306a36Sopenharmony_ci 2662306a36Sopenharmony_ci/* Stochastic Fairness Queuing algorithm. 2762306a36Sopenharmony_ci ======================================= 2862306a36Sopenharmony_ci 2962306a36Sopenharmony_ci Source: 3062306a36Sopenharmony_ci Paul E. McKenney "Stochastic Fairness Queuing", 3162306a36Sopenharmony_ci IEEE INFOCOMM'90 Proceedings, San Francisco, 1990. 3262306a36Sopenharmony_ci 3362306a36Sopenharmony_ci Paul E. McKenney "Stochastic Fairness Queuing", 3462306a36Sopenharmony_ci "Interworking: Research and Experience", v.2, 1991, p.113-131. 3562306a36Sopenharmony_ci 3662306a36Sopenharmony_ci 3762306a36Sopenharmony_ci See also: 3862306a36Sopenharmony_ci M. Shreedhar and George Varghese "Efficient Fair 3962306a36Sopenharmony_ci Queuing using Deficit Round Robin", Proc. SIGCOMM 95. 4062306a36Sopenharmony_ci 4162306a36Sopenharmony_ci 4262306a36Sopenharmony_ci This is not the thing that is usually called (W)FQ nowadays. 4362306a36Sopenharmony_ci It does not use any timestamp mechanism, but instead 4462306a36Sopenharmony_ci processes queues in round-robin order. 4562306a36Sopenharmony_ci 4662306a36Sopenharmony_ci ADVANTAGE: 4762306a36Sopenharmony_ci 4862306a36Sopenharmony_ci - It is very cheap. Both CPU and memory requirements are minimal. 4962306a36Sopenharmony_ci 5062306a36Sopenharmony_ci DRAWBACKS: 5162306a36Sopenharmony_ci 5262306a36Sopenharmony_ci - "Stochastic" -> It is not 100% fair. 5362306a36Sopenharmony_ci When hash collisions occur, several flows are considered as one. 5462306a36Sopenharmony_ci 5562306a36Sopenharmony_ci - "Round-robin" -> It introduces larger delays than virtual clock 5662306a36Sopenharmony_ci based schemes, and should not be used for isolating interactive 5762306a36Sopenharmony_ci traffic from non-interactive. It means, that this scheduler 5862306a36Sopenharmony_ci should be used as leaf of CBQ or P3, which put interactive traffic 5962306a36Sopenharmony_ci to higher priority band. 6062306a36Sopenharmony_ci 6162306a36Sopenharmony_ci We still need true WFQ for top level CSZ, but using WFQ 6262306a36Sopenharmony_ci for the best effort traffic is absolutely pointless: 6362306a36Sopenharmony_ci SFQ is superior for this purpose. 6462306a36Sopenharmony_ci 6562306a36Sopenharmony_ci IMPLEMENTATION: 6662306a36Sopenharmony_ci This implementation limits : 6762306a36Sopenharmony_ci - maximal queue length per flow to 127 packets. 6862306a36Sopenharmony_ci - max mtu to 2^18-1; 6962306a36Sopenharmony_ci - max 65408 flows, 7062306a36Sopenharmony_ci - number of hash buckets to 65536. 7162306a36Sopenharmony_ci 7262306a36Sopenharmony_ci It is easy to increase these values, but not in flight. */ 7362306a36Sopenharmony_ci 7462306a36Sopenharmony_ci#define SFQ_MAX_DEPTH 127 /* max number of packets per flow */ 7562306a36Sopenharmony_ci#define SFQ_DEFAULT_FLOWS 128 7662306a36Sopenharmony_ci#define SFQ_MAX_FLOWS (0x10000 - SFQ_MAX_DEPTH - 1) /* max number of flows */ 7762306a36Sopenharmony_ci#define SFQ_EMPTY_SLOT 0xffff 7862306a36Sopenharmony_ci#define SFQ_DEFAULT_HASH_DIVISOR 1024 7962306a36Sopenharmony_ci 8062306a36Sopenharmony_ci/* We use 16 bits to store allot, and want to handle packets up to 64K 8162306a36Sopenharmony_ci * Scale allot by 8 (1<<3) so that no overflow occurs. 8262306a36Sopenharmony_ci */ 8362306a36Sopenharmony_ci#define SFQ_ALLOT_SHIFT 3 8462306a36Sopenharmony_ci#define SFQ_ALLOT_SIZE(X) DIV_ROUND_UP(X, 1 << SFQ_ALLOT_SHIFT) 8562306a36Sopenharmony_ci 8662306a36Sopenharmony_ci/* This type should contain at least SFQ_MAX_DEPTH + 1 + SFQ_MAX_FLOWS values */ 8762306a36Sopenharmony_citypedef u16 sfq_index; 8862306a36Sopenharmony_ci 8962306a36Sopenharmony_ci/* 9062306a36Sopenharmony_ci * We dont use pointers to save space. 9162306a36Sopenharmony_ci * Small indexes [0 ... SFQ_MAX_FLOWS - 1] are 'pointers' to slots[] array 9262306a36Sopenharmony_ci * while following values [SFQ_MAX_FLOWS ... SFQ_MAX_FLOWS + SFQ_MAX_DEPTH] 9362306a36Sopenharmony_ci * are 'pointers' to dep[] array 9462306a36Sopenharmony_ci */ 9562306a36Sopenharmony_cistruct sfq_head { 9662306a36Sopenharmony_ci sfq_index next; 9762306a36Sopenharmony_ci sfq_index prev; 9862306a36Sopenharmony_ci}; 9962306a36Sopenharmony_ci 10062306a36Sopenharmony_cistruct sfq_slot { 10162306a36Sopenharmony_ci struct sk_buff *skblist_next; 10262306a36Sopenharmony_ci struct sk_buff *skblist_prev; 10362306a36Sopenharmony_ci sfq_index qlen; /* number of skbs in skblist */ 10462306a36Sopenharmony_ci sfq_index next; /* next slot in sfq RR chain */ 10562306a36Sopenharmony_ci struct sfq_head dep; /* anchor in dep[] chains */ 10662306a36Sopenharmony_ci unsigned short hash; /* hash value (index in ht[]) */ 10762306a36Sopenharmony_ci short allot; /* credit for this slot */ 10862306a36Sopenharmony_ci 10962306a36Sopenharmony_ci unsigned int backlog; 11062306a36Sopenharmony_ci struct red_vars vars; 11162306a36Sopenharmony_ci}; 11262306a36Sopenharmony_ci 11362306a36Sopenharmony_cistruct sfq_sched_data { 11462306a36Sopenharmony_ci/* frequently used fields */ 11562306a36Sopenharmony_ci int limit; /* limit of total number of packets in this qdisc */ 11662306a36Sopenharmony_ci unsigned int divisor; /* number of slots in hash table */ 11762306a36Sopenharmony_ci u8 headdrop; 11862306a36Sopenharmony_ci u8 maxdepth; /* limit of packets per flow */ 11962306a36Sopenharmony_ci 12062306a36Sopenharmony_ci siphash_key_t perturbation; 12162306a36Sopenharmony_ci u8 cur_depth; /* depth of longest slot */ 12262306a36Sopenharmony_ci u8 flags; 12362306a36Sopenharmony_ci unsigned short scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */ 12462306a36Sopenharmony_ci struct tcf_proto __rcu *filter_list; 12562306a36Sopenharmony_ci struct tcf_block *block; 12662306a36Sopenharmony_ci sfq_index *ht; /* Hash table ('divisor' slots) */ 12762306a36Sopenharmony_ci struct sfq_slot *slots; /* Flows table ('maxflows' entries) */ 12862306a36Sopenharmony_ci 12962306a36Sopenharmony_ci struct red_parms *red_parms; 13062306a36Sopenharmony_ci struct tc_sfqred_stats stats; 13162306a36Sopenharmony_ci struct sfq_slot *tail; /* current slot in round */ 13262306a36Sopenharmony_ci 13362306a36Sopenharmony_ci struct sfq_head dep[SFQ_MAX_DEPTH + 1]; 13462306a36Sopenharmony_ci /* Linked lists of slots, indexed by depth 13562306a36Sopenharmony_ci * dep[0] : list of unused flows 13662306a36Sopenharmony_ci * dep[1] : list of flows with 1 packet 13762306a36Sopenharmony_ci * dep[X] : list of flows with X packets 13862306a36Sopenharmony_ci */ 13962306a36Sopenharmony_ci 14062306a36Sopenharmony_ci unsigned int maxflows; /* number of flows in flows array */ 14162306a36Sopenharmony_ci int perturb_period; 14262306a36Sopenharmony_ci unsigned int quantum; /* Allotment per round: MUST BE >= MTU */ 14362306a36Sopenharmony_ci struct timer_list perturb_timer; 14462306a36Sopenharmony_ci struct Qdisc *sch; 14562306a36Sopenharmony_ci}; 14662306a36Sopenharmony_ci 14762306a36Sopenharmony_ci/* 14862306a36Sopenharmony_ci * sfq_head are either in a sfq_slot or in dep[] array 14962306a36Sopenharmony_ci */ 15062306a36Sopenharmony_cistatic inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index val) 15162306a36Sopenharmony_ci{ 15262306a36Sopenharmony_ci if (val < SFQ_MAX_FLOWS) 15362306a36Sopenharmony_ci return &q->slots[val].dep; 15462306a36Sopenharmony_ci return &q->dep[val - SFQ_MAX_FLOWS]; 15562306a36Sopenharmony_ci} 15662306a36Sopenharmony_ci 15762306a36Sopenharmony_cistatic unsigned int sfq_hash(const struct sfq_sched_data *q, 15862306a36Sopenharmony_ci const struct sk_buff *skb) 15962306a36Sopenharmony_ci{ 16062306a36Sopenharmony_ci return skb_get_hash_perturb(skb, &q->perturbation) & (q->divisor - 1); 16162306a36Sopenharmony_ci} 16262306a36Sopenharmony_ci 16362306a36Sopenharmony_cistatic unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch, 16462306a36Sopenharmony_ci int *qerr) 16562306a36Sopenharmony_ci{ 16662306a36Sopenharmony_ci struct sfq_sched_data *q = qdisc_priv(sch); 16762306a36Sopenharmony_ci struct tcf_result res; 16862306a36Sopenharmony_ci struct tcf_proto *fl; 16962306a36Sopenharmony_ci int result; 17062306a36Sopenharmony_ci 17162306a36Sopenharmony_ci if (TC_H_MAJ(skb->priority) == sch->handle && 17262306a36Sopenharmony_ci TC_H_MIN(skb->priority) > 0 && 17362306a36Sopenharmony_ci TC_H_MIN(skb->priority) <= q->divisor) 17462306a36Sopenharmony_ci return TC_H_MIN(skb->priority); 17562306a36Sopenharmony_ci 17662306a36Sopenharmony_ci fl = rcu_dereference_bh(q->filter_list); 17762306a36Sopenharmony_ci if (!fl) 17862306a36Sopenharmony_ci return sfq_hash(q, skb) + 1; 17962306a36Sopenharmony_ci 18062306a36Sopenharmony_ci *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; 18162306a36Sopenharmony_ci result = tcf_classify(skb, NULL, fl, &res, false); 18262306a36Sopenharmony_ci if (result >= 0) { 18362306a36Sopenharmony_ci#ifdef CONFIG_NET_CLS_ACT 18462306a36Sopenharmony_ci switch (result) { 18562306a36Sopenharmony_ci case TC_ACT_STOLEN: 18662306a36Sopenharmony_ci case TC_ACT_QUEUED: 18762306a36Sopenharmony_ci case TC_ACT_TRAP: 18862306a36Sopenharmony_ci *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; 18962306a36Sopenharmony_ci fallthrough; 19062306a36Sopenharmony_ci case TC_ACT_SHOT: 19162306a36Sopenharmony_ci return 0; 19262306a36Sopenharmony_ci } 19362306a36Sopenharmony_ci#endif 19462306a36Sopenharmony_ci if (TC_H_MIN(res.classid) <= q->divisor) 19562306a36Sopenharmony_ci return TC_H_MIN(res.classid); 19662306a36Sopenharmony_ci } 19762306a36Sopenharmony_ci return 0; 19862306a36Sopenharmony_ci} 19962306a36Sopenharmony_ci 20062306a36Sopenharmony_ci/* 20162306a36Sopenharmony_ci * x : slot number [0 .. SFQ_MAX_FLOWS - 1] 20262306a36Sopenharmony_ci */ 20362306a36Sopenharmony_cistatic inline void sfq_link(struct sfq_sched_data *q, sfq_index x) 20462306a36Sopenharmony_ci{ 20562306a36Sopenharmony_ci sfq_index p, n; 20662306a36Sopenharmony_ci struct sfq_slot *slot = &q->slots[x]; 20762306a36Sopenharmony_ci int qlen = slot->qlen; 20862306a36Sopenharmony_ci 20962306a36Sopenharmony_ci p = qlen + SFQ_MAX_FLOWS; 21062306a36Sopenharmony_ci n = q->dep[qlen].next; 21162306a36Sopenharmony_ci 21262306a36Sopenharmony_ci slot->dep.next = n; 21362306a36Sopenharmony_ci slot->dep.prev = p; 21462306a36Sopenharmony_ci 21562306a36Sopenharmony_ci q->dep[qlen].next = x; /* sfq_dep_head(q, p)->next = x */ 21662306a36Sopenharmony_ci sfq_dep_head(q, n)->prev = x; 21762306a36Sopenharmony_ci} 21862306a36Sopenharmony_ci 21962306a36Sopenharmony_ci#define sfq_unlink(q, x, n, p) \ 22062306a36Sopenharmony_ci do { \ 22162306a36Sopenharmony_ci n = q->slots[x].dep.next; \ 22262306a36Sopenharmony_ci p = q->slots[x].dep.prev; \ 22362306a36Sopenharmony_ci sfq_dep_head(q, p)->next = n; \ 22462306a36Sopenharmony_ci sfq_dep_head(q, n)->prev = p; \ 22562306a36Sopenharmony_ci } while (0) 22662306a36Sopenharmony_ci 22762306a36Sopenharmony_ci 22862306a36Sopenharmony_cistatic inline void sfq_dec(struct sfq_sched_data *q, sfq_index x) 22962306a36Sopenharmony_ci{ 23062306a36Sopenharmony_ci sfq_index p, n; 23162306a36Sopenharmony_ci int d; 23262306a36Sopenharmony_ci 23362306a36Sopenharmony_ci sfq_unlink(q, x, n, p); 23462306a36Sopenharmony_ci 23562306a36Sopenharmony_ci d = q->slots[x].qlen--; 23662306a36Sopenharmony_ci if (n == p && q->cur_depth == d) 23762306a36Sopenharmony_ci q->cur_depth--; 23862306a36Sopenharmony_ci sfq_link(q, x); 23962306a36Sopenharmony_ci} 24062306a36Sopenharmony_ci 24162306a36Sopenharmony_cistatic inline void sfq_inc(struct sfq_sched_data *q, sfq_index x) 24262306a36Sopenharmony_ci{ 24362306a36Sopenharmony_ci sfq_index p, n; 24462306a36Sopenharmony_ci int d; 24562306a36Sopenharmony_ci 24662306a36Sopenharmony_ci sfq_unlink(q, x, n, p); 24762306a36Sopenharmony_ci 24862306a36Sopenharmony_ci d = ++q->slots[x].qlen; 24962306a36Sopenharmony_ci if (q->cur_depth < d) 25062306a36Sopenharmony_ci q->cur_depth = d; 25162306a36Sopenharmony_ci sfq_link(q, x); 25262306a36Sopenharmony_ci} 25362306a36Sopenharmony_ci 25462306a36Sopenharmony_ci/* helper functions : might be changed when/if skb use a standard list_head */ 25562306a36Sopenharmony_ci 25662306a36Sopenharmony_ci/* remove one skb from tail of slot queue */ 25762306a36Sopenharmony_cistatic inline struct sk_buff *slot_dequeue_tail(struct sfq_slot *slot) 25862306a36Sopenharmony_ci{ 25962306a36Sopenharmony_ci struct sk_buff *skb = slot->skblist_prev; 26062306a36Sopenharmony_ci 26162306a36Sopenharmony_ci slot->skblist_prev = skb->prev; 26262306a36Sopenharmony_ci skb->prev->next = (struct sk_buff *)slot; 26362306a36Sopenharmony_ci skb->next = skb->prev = NULL; 26462306a36Sopenharmony_ci return skb; 26562306a36Sopenharmony_ci} 26662306a36Sopenharmony_ci 26762306a36Sopenharmony_ci/* remove one skb from head of slot queue */ 26862306a36Sopenharmony_cistatic inline struct sk_buff *slot_dequeue_head(struct sfq_slot *slot) 26962306a36Sopenharmony_ci{ 27062306a36Sopenharmony_ci struct sk_buff *skb = slot->skblist_next; 27162306a36Sopenharmony_ci 27262306a36Sopenharmony_ci slot->skblist_next = skb->next; 27362306a36Sopenharmony_ci skb->next->prev = (struct sk_buff *)slot; 27462306a36Sopenharmony_ci skb->next = skb->prev = NULL; 27562306a36Sopenharmony_ci return skb; 27662306a36Sopenharmony_ci} 27762306a36Sopenharmony_ci 27862306a36Sopenharmony_cistatic inline void slot_queue_init(struct sfq_slot *slot) 27962306a36Sopenharmony_ci{ 28062306a36Sopenharmony_ci memset(slot, 0, sizeof(*slot)); 28162306a36Sopenharmony_ci slot->skblist_prev = slot->skblist_next = (struct sk_buff *)slot; 28262306a36Sopenharmony_ci} 28362306a36Sopenharmony_ci 28462306a36Sopenharmony_ci/* add skb to slot queue (tail add) */ 28562306a36Sopenharmony_cistatic inline void slot_queue_add(struct sfq_slot *slot, struct sk_buff *skb) 28662306a36Sopenharmony_ci{ 28762306a36Sopenharmony_ci skb->prev = slot->skblist_prev; 28862306a36Sopenharmony_ci skb->next = (struct sk_buff *)slot; 28962306a36Sopenharmony_ci slot->skblist_prev->next = skb; 29062306a36Sopenharmony_ci slot->skblist_prev = skb; 29162306a36Sopenharmony_ci} 29262306a36Sopenharmony_ci 29362306a36Sopenharmony_cistatic unsigned int sfq_drop(struct Qdisc *sch, struct sk_buff **to_free) 29462306a36Sopenharmony_ci{ 29562306a36Sopenharmony_ci struct sfq_sched_data *q = qdisc_priv(sch); 29662306a36Sopenharmony_ci sfq_index x, d = q->cur_depth; 29762306a36Sopenharmony_ci struct sk_buff *skb; 29862306a36Sopenharmony_ci unsigned int len; 29962306a36Sopenharmony_ci struct sfq_slot *slot; 30062306a36Sopenharmony_ci 30162306a36Sopenharmony_ci /* Queue is full! Find the longest slot and drop tail packet from it */ 30262306a36Sopenharmony_ci if (d > 1) { 30362306a36Sopenharmony_ci x = q->dep[d].next; 30462306a36Sopenharmony_ci slot = &q->slots[x]; 30562306a36Sopenharmony_cidrop: 30662306a36Sopenharmony_ci skb = q->headdrop ? slot_dequeue_head(slot) : slot_dequeue_tail(slot); 30762306a36Sopenharmony_ci len = qdisc_pkt_len(skb); 30862306a36Sopenharmony_ci slot->backlog -= len; 30962306a36Sopenharmony_ci sfq_dec(q, x); 31062306a36Sopenharmony_ci sch->q.qlen--; 31162306a36Sopenharmony_ci qdisc_qstats_backlog_dec(sch, skb); 31262306a36Sopenharmony_ci qdisc_drop(skb, sch, to_free); 31362306a36Sopenharmony_ci return len; 31462306a36Sopenharmony_ci } 31562306a36Sopenharmony_ci 31662306a36Sopenharmony_ci if (d == 1) { 31762306a36Sopenharmony_ci /* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */ 31862306a36Sopenharmony_ci x = q->tail->next; 31962306a36Sopenharmony_ci slot = &q->slots[x]; 32062306a36Sopenharmony_ci q->tail->next = slot->next; 32162306a36Sopenharmony_ci q->ht[slot->hash] = SFQ_EMPTY_SLOT; 32262306a36Sopenharmony_ci goto drop; 32362306a36Sopenharmony_ci } 32462306a36Sopenharmony_ci 32562306a36Sopenharmony_ci return 0; 32662306a36Sopenharmony_ci} 32762306a36Sopenharmony_ci 32862306a36Sopenharmony_ci/* Is ECN parameter configured */ 32962306a36Sopenharmony_cistatic int sfq_prob_mark(const struct sfq_sched_data *q) 33062306a36Sopenharmony_ci{ 33162306a36Sopenharmony_ci return q->flags & TC_RED_ECN; 33262306a36Sopenharmony_ci} 33362306a36Sopenharmony_ci 33462306a36Sopenharmony_ci/* Should packets over max threshold just be marked */ 33562306a36Sopenharmony_cistatic int sfq_hard_mark(const struct sfq_sched_data *q) 33662306a36Sopenharmony_ci{ 33762306a36Sopenharmony_ci return (q->flags & (TC_RED_ECN | TC_RED_HARDDROP)) == TC_RED_ECN; 33862306a36Sopenharmony_ci} 33962306a36Sopenharmony_ci 34062306a36Sopenharmony_cistatic int sfq_headdrop(const struct sfq_sched_data *q) 34162306a36Sopenharmony_ci{ 34262306a36Sopenharmony_ci return q->headdrop; 34362306a36Sopenharmony_ci} 34462306a36Sopenharmony_ci 34562306a36Sopenharmony_cistatic int 34662306a36Sopenharmony_cisfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) 34762306a36Sopenharmony_ci{ 34862306a36Sopenharmony_ci struct sfq_sched_data *q = qdisc_priv(sch); 34962306a36Sopenharmony_ci unsigned int hash, dropped; 35062306a36Sopenharmony_ci sfq_index x, qlen; 35162306a36Sopenharmony_ci struct sfq_slot *slot; 35262306a36Sopenharmony_ci int ret; 35362306a36Sopenharmony_ci struct sk_buff *head; 35462306a36Sopenharmony_ci int delta; 35562306a36Sopenharmony_ci 35662306a36Sopenharmony_ci hash = sfq_classify(skb, sch, &ret); 35762306a36Sopenharmony_ci if (hash == 0) { 35862306a36Sopenharmony_ci if (ret & __NET_XMIT_BYPASS) 35962306a36Sopenharmony_ci qdisc_qstats_drop(sch); 36062306a36Sopenharmony_ci __qdisc_drop(skb, to_free); 36162306a36Sopenharmony_ci return ret; 36262306a36Sopenharmony_ci } 36362306a36Sopenharmony_ci hash--; 36462306a36Sopenharmony_ci 36562306a36Sopenharmony_ci x = q->ht[hash]; 36662306a36Sopenharmony_ci slot = &q->slots[x]; 36762306a36Sopenharmony_ci if (x == SFQ_EMPTY_SLOT) { 36862306a36Sopenharmony_ci x = q->dep[0].next; /* get a free slot */ 36962306a36Sopenharmony_ci if (x >= SFQ_MAX_FLOWS) 37062306a36Sopenharmony_ci return qdisc_drop(skb, sch, to_free); 37162306a36Sopenharmony_ci q->ht[hash] = x; 37262306a36Sopenharmony_ci slot = &q->slots[x]; 37362306a36Sopenharmony_ci slot->hash = hash; 37462306a36Sopenharmony_ci slot->backlog = 0; /* should already be 0 anyway... */ 37562306a36Sopenharmony_ci red_set_vars(&slot->vars); 37662306a36Sopenharmony_ci goto enqueue; 37762306a36Sopenharmony_ci } 37862306a36Sopenharmony_ci if (q->red_parms) { 37962306a36Sopenharmony_ci slot->vars.qavg = red_calc_qavg_no_idle_time(q->red_parms, 38062306a36Sopenharmony_ci &slot->vars, 38162306a36Sopenharmony_ci slot->backlog); 38262306a36Sopenharmony_ci switch (red_action(q->red_parms, 38362306a36Sopenharmony_ci &slot->vars, 38462306a36Sopenharmony_ci slot->vars.qavg)) { 38562306a36Sopenharmony_ci case RED_DONT_MARK: 38662306a36Sopenharmony_ci break; 38762306a36Sopenharmony_ci 38862306a36Sopenharmony_ci case RED_PROB_MARK: 38962306a36Sopenharmony_ci qdisc_qstats_overlimit(sch); 39062306a36Sopenharmony_ci if (sfq_prob_mark(q)) { 39162306a36Sopenharmony_ci /* We know we have at least one packet in queue */ 39262306a36Sopenharmony_ci if (sfq_headdrop(q) && 39362306a36Sopenharmony_ci INET_ECN_set_ce(slot->skblist_next)) { 39462306a36Sopenharmony_ci q->stats.prob_mark_head++; 39562306a36Sopenharmony_ci break; 39662306a36Sopenharmony_ci } 39762306a36Sopenharmony_ci if (INET_ECN_set_ce(skb)) { 39862306a36Sopenharmony_ci q->stats.prob_mark++; 39962306a36Sopenharmony_ci break; 40062306a36Sopenharmony_ci } 40162306a36Sopenharmony_ci } 40262306a36Sopenharmony_ci q->stats.prob_drop++; 40362306a36Sopenharmony_ci goto congestion_drop; 40462306a36Sopenharmony_ci 40562306a36Sopenharmony_ci case RED_HARD_MARK: 40662306a36Sopenharmony_ci qdisc_qstats_overlimit(sch); 40762306a36Sopenharmony_ci if (sfq_hard_mark(q)) { 40862306a36Sopenharmony_ci /* We know we have at least one packet in queue */ 40962306a36Sopenharmony_ci if (sfq_headdrop(q) && 41062306a36Sopenharmony_ci INET_ECN_set_ce(slot->skblist_next)) { 41162306a36Sopenharmony_ci q->stats.forced_mark_head++; 41262306a36Sopenharmony_ci break; 41362306a36Sopenharmony_ci } 41462306a36Sopenharmony_ci if (INET_ECN_set_ce(skb)) { 41562306a36Sopenharmony_ci q->stats.forced_mark++; 41662306a36Sopenharmony_ci break; 41762306a36Sopenharmony_ci } 41862306a36Sopenharmony_ci } 41962306a36Sopenharmony_ci q->stats.forced_drop++; 42062306a36Sopenharmony_ci goto congestion_drop; 42162306a36Sopenharmony_ci } 42262306a36Sopenharmony_ci } 42362306a36Sopenharmony_ci 42462306a36Sopenharmony_ci if (slot->qlen >= q->maxdepth) { 42562306a36Sopenharmony_cicongestion_drop: 42662306a36Sopenharmony_ci if (!sfq_headdrop(q)) 42762306a36Sopenharmony_ci return qdisc_drop(skb, sch, to_free); 42862306a36Sopenharmony_ci 42962306a36Sopenharmony_ci /* We know we have at least one packet in queue */ 43062306a36Sopenharmony_ci head = slot_dequeue_head(slot); 43162306a36Sopenharmony_ci delta = qdisc_pkt_len(head) - qdisc_pkt_len(skb); 43262306a36Sopenharmony_ci sch->qstats.backlog -= delta; 43362306a36Sopenharmony_ci slot->backlog -= delta; 43462306a36Sopenharmony_ci qdisc_drop(head, sch, to_free); 43562306a36Sopenharmony_ci 43662306a36Sopenharmony_ci slot_queue_add(slot, skb); 43762306a36Sopenharmony_ci qdisc_tree_reduce_backlog(sch, 0, delta); 43862306a36Sopenharmony_ci return NET_XMIT_CN; 43962306a36Sopenharmony_ci } 44062306a36Sopenharmony_ci 44162306a36Sopenharmony_cienqueue: 44262306a36Sopenharmony_ci qdisc_qstats_backlog_inc(sch, skb); 44362306a36Sopenharmony_ci slot->backlog += qdisc_pkt_len(skb); 44462306a36Sopenharmony_ci slot_queue_add(slot, skb); 44562306a36Sopenharmony_ci sfq_inc(q, x); 44662306a36Sopenharmony_ci if (slot->qlen == 1) { /* The flow is new */ 44762306a36Sopenharmony_ci if (q->tail == NULL) { /* It is the first flow */ 44862306a36Sopenharmony_ci slot->next = x; 44962306a36Sopenharmony_ci } else { 45062306a36Sopenharmony_ci slot->next = q->tail->next; 45162306a36Sopenharmony_ci q->tail->next = x; 45262306a36Sopenharmony_ci } 45362306a36Sopenharmony_ci /* We put this flow at the end of our flow list. 45462306a36Sopenharmony_ci * This might sound unfair for a new flow to wait after old ones, 45562306a36Sopenharmony_ci * but we could endup servicing new flows only, and freeze old ones. 45662306a36Sopenharmony_ci */ 45762306a36Sopenharmony_ci q->tail = slot; 45862306a36Sopenharmony_ci /* We could use a bigger initial quantum for new flows */ 45962306a36Sopenharmony_ci slot->allot = q->scaled_quantum; 46062306a36Sopenharmony_ci } 46162306a36Sopenharmony_ci if (++sch->q.qlen <= q->limit) 46262306a36Sopenharmony_ci return NET_XMIT_SUCCESS; 46362306a36Sopenharmony_ci 46462306a36Sopenharmony_ci qlen = slot->qlen; 46562306a36Sopenharmony_ci dropped = sfq_drop(sch, to_free); 46662306a36Sopenharmony_ci /* Return Congestion Notification only if we dropped a packet 46762306a36Sopenharmony_ci * from this flow. 46862306a36Sopenharmony_ci */ 46962306a36Sopenharmony_ci if (qlen != slot->qlen) { 47062306a36Sopenharmony_ci qdisc_tree_reduce_backlog(sch, 0, dropped - qdisc_pkt_len(skb)); 47162306a36Sopenharmony_ci return NET_XMIT_CN; 47262306a36Sopenharmony_ci } 47362306a36Sopenharmony_ci 47462306a36Sopenharmony_ci /* As we dropped a packet, better let upper stack know this */ 47562306a36Sopenharmony_ci qdisc_tree_reduce_backlog(sch, 1, dropped); 47662306a36Sopenharmony_ci return NET_XMIT_SUCCESS; 47762306a36Sopenharmony_ci} 47862306a36Sopenharmony_ci 47962306a36Sopenharmony_cistatic struct sk_buff * 48062306a36Sopenharmony_cisfq_dequeue(struct Qdisc *sch) 48162306a36Sopenharmony_ci{ 48262306a36Sopenharmony_ci struct sfq_sched_data *q = qdisc_priv(sch); 48362306a36Sopenharmony_ci struct sk_buff *skb; 48462306a36Sopenharmony_ci sfq_index a, next_a; 48562306a36Sopenharmony_ci struct sfq_slot *slot; 48662306a36Sopenharmony_ci 48762306a36Sopenharmony_ci /* No active slots */ 48862306a36Sopenharmony_ci if (q->tail == NULL) 48962306a36Sopenharmony_ci return NULL; 49062306a36Sopenharmony_ci 49162306a36Sopenharmony_cinext_slot: 49262306a36Sopenharmony_ci a = q->tail->next; 49362306a36Sopenharmony_ci slot = &q->slots[a]; 49462306a36Sopenharmony_ci if (slot->allot <= 0) { 49562306a36Sopenharmony_ci q->tail = slot; 49662306a36Sopenharmony_ci slot->allot += q->scaled_quantum; 49762306a36Sopenharmony_ci goto next_slot; 49862306a36Sopenharmony_ci } 49962306a36Sopenharmony_ci skb = slot_dequeue_head(slot); 50062306a36Sopenharmony_ci sfq_dec(q, a); 50162306a36Sopenharmony_ci qdisc_bstats_update(sch, skb); 50262306a36Sopenharmony_ci sch->q.qlen--; 50362306a36Sopenharmony_ci qdisc_qstats_backlog_dec(sch, skb); 50462306a36Sopenharmony_ci slot->backlog -= qdisc_pkt_len(skb); 50562306a36Sopenharmony_ci /* Is the slot empty? */ 50662306a36Sopenharmony_ci if (slot->qlen == 0) { 50762306a36Sopenharmony_ci q->ht[slot->hash] = SFQ_EMPTY_SLOT; 50862306a36Sopenharmony_ci next_a = slot->next; 50962306a36Sopenharmony_ci if (a == next_a) { 51062306a36Sopenharmony_ci q->tail = NULL; /* no more active slots */ 51162306a36Sopenharmony_ci return skb; 51262306a36Sopenharmony_ci } 51362306a36Sopenharmony_ci q->tail->next = next_a; 51462306a36Sopenharmony_ci } else { 51562306a36Sopenharmony_ci slot->allot -= SFQ_ALLOT_SIZE(qdisc_pkt_len(skb)); 51662306a36Sopenharmony_ci } 51762306a36Sopenharmony_ci return skb; 51862306a36Sopenharmony_ci} 51962306a36Sopenharmony_ci 52062306a36Sopenharmony_cistatic void 52162306a36Sopenharmony_cisfq_reset(struct Qdisc *sch) 52262306a36Sopenharmony_ci{ 52362306a36Sopenharmony_ci struct sk_buff *skb; 52462306a36Sopenharmony_ci 52562306a36Sopenharmony_ci while ((skb = sfq_dequeue(sch)) != NULL) 52662306a36Sopenharmony_ci rtnl_kfree_skbs(skb, skb); 52762306a36Sopenharmony_ci} 52862306a36Sopenharmony_ci 52962306a36Sopenharmony_ci/* 53062306a36Sopenharmony_ci * When q->perturbation is changed, we rehash all queued skbs 53162306a36Sopenharmony_ci * to avoid OOO (Out Of Order) effects. 53262306a36Sopenharmony_ci * We dont use sfq_dequeue()/sfq_enqueue() because we dont want to change 53362306a36Sopenharmony_ci * counters. 53462306a36Sopenharmony_ci */ 53562306a36Sopenharmony_cistatic void sfq_rehash(struct Qdisc *sch) 53662306a36Sopenharmony_ci{ 53762306a36Sopenharmony_ci struct sfq_sched_data *q = qdisc_priv(sch); 53862306a36Sopenharmony_ci struct sk_buff *skb; 53962306a36Sopenharmony_ci int i; 54062306a36Sopenharmony_ci struct sfq_slot *slot; 54162306a36Sopenharmony_ci struct sk_buff_head list; 54262306a36Sopenharmony_ci int dropped = 0; 54362306a36Sopenharmony_ci unsigned int drop_len = 0; 54462306a36Sopenharmony_ci 54562306a36Sopenharmony_ci __skb_queue_head_init(&list); 54662306a36Sopenharmony_ci 54762306a36Sopenharmony_ci for (i = 0; i < q->maxflows; i++) { 54862306a36Sopenharmony_ci slot = &q->slots[i]; 54962306a36Sopenharmony_ci if (!slot->qlen) 55062306a36Sopenharmony_ci continue; 55162306a36Sopenharmony_ci while (slot->qlen) { 55262306a36Sopenharmony_ci skb = slot_dequeue_head(slot); 55362306a36Sopenharmony_ci sfq_dec(q, i); 55462306a36Sopenharmony_ci __skb_queue_tail(&list, skb); 55562306a36Sopenharmony_ci } 55662306a36Sopenharmony_ci slot->backlog = 0; 55762306a36Sopenharmony_ci red_set_vars(&slot->vars); 55862306a36Sopenharmony_ci q->ht[slot->hash] = SFQ_EMPTY_SLOT; 55962306a36Sopenharmony_ci } 56062306a36Sopenharmony_ci q->tail = NULL; 56162306a36Sopenharmony_ci 56262306a36Sopenharmony_ci while ((skb = __skb_dequeue(&list)) != NULL) { 56362306a36Sopenharmony_ci unsigned int hash = sfq_hash(q, skb); 56462306a36Sopenharmony_ci sfq_index x = q->ht[hash]; 56562306a36Sopenharmony_ci 56662306a36Sopenharmony_ci slot = &q->slots[x]; 56762306a36Sopenharmony_ci if (x == SFQ_EMPTY_SLOT) { 56862306a36Sopenharmony_ci x = q->dep[0].next; /* get a free slot */ 56962306a36Sopenharmony_ci if (x >= SFQ_MAX_FLOWS) { 57062306a36Sopenharmony_cidrop: 57162306a36Sopenharmony_ci qdisc_qstats_backlog_dec(sch, skb); 57262306a36Sopenharmony_ci drop_len += qdisc_pkt_len(skb); 57362306a36Sopenharmony_ci kfree_skb(skb); 57462306a36Sopenharmony_ci dropped++; 57562306a36Sopenharmony_ci continue; 57662306a36Sopenharmony_ci } 57762306a36Sopenharmony_ci q->ht[hash] = x; 57862306a36Sopenharmony_ci slot = &q->slots[x]; 57962306a36Sopenharmony_ci slot->hash = hash; 58062306a36Sopenharmony_ci } 58162306a36Sopenharmony_ci if (slot->qlen >= q->maxdepth) 58262306a36Sopenharmony_ci goto drop; 58362306a36Sopenharmony_ci slot_queue_add(slot, skb); 58462306a36Sopenharmony_ci if (q->red_parms) 58562306a36Sopenharmony_ci slot->vars.qavg = red_calc_qavg(q->red_parms, 58662306a36Sopenharmony_ci &slot->vars, 58762306a36Sopenharmony_ci slot->backlog); 58862306a36Sopenharmony_ci slot->backlog += qdisc_pkt_len(skb); 58962306a36Sopenharmony_ci sfq_inc(q, x); 59062306a36Sopenharmony_ci if (slot->qlen == 1) { /* The flow is new */ 59162306a36Sopenharmony_ci if (q->tail == NULL) { /* It is the first flow */ 59262306a36Sopenharmony_ci slot->next = x; 59362306a36Sopenharmony_ci } else { 59462306a36Sopenharmony_ci slot->next = q->tail->next; 59562306a36Sopenharmony_ci q->tail->next = x; 59662306a36Sopenharmony_ci } 59762306a36Sopenharmony_ci q->tail = slot; 59862306a36Sopenharmony_ci slot->allot = q->scaled_quantum; 59962306a36Sopenharmony_ci } 60062306a36Sopenharmony_ci } 60162306a36Sopenharmony_ci sch->q.qlen -= dropped; 60262306a36Sopenharmony_ci qdisc_tree_reduce_backlog(sch, dropped, drop_len); 60362306a36Sopenharmony_ci} 60462306a36Sopenharmony_ci 60562306a36Sopenharmony_cistatic void sfq_perturbation(struct timer_list *t) 60662306a36Sopenharmony_ci{ 60762306a36Sopenharmony_ci struct sfq_sched_data *q = from_timer(q, t, perturb_timer); 60862306a36Sopenharmony_ci struct Qdisc *sch = q->sch; 60962306a36Sopenharmony_ci spinlock_t *root_lock; 61062306a36Sopenharmony_ci siphash_key_t nkey; 61162306a36Sopenharmony_ci 61262306a36Sopenharmony_ci get_random_bytes(&nkey, sizeof(nkey)); 61362306a36Sopenharmony_ci rcu_read_lock(); 61462306a36Sopenharmony_ci root_lock = qdisc_lock(qdisc_root_sleeping(sch)); 61562306a36Sopenharmony_ci spin_lock(root_lock); 61662306a36Sopenharmony_ci q->perturbation = nkey; 61762306a36Sopenharmony_ci if (!q->filter_list && q->tail) 61862306a36Sopenharmony_ci sfq_rehash(sch); 61962306a36Sopenharmony_ci spin_unlock(root_lock); 62062306a36Sopenharmony_ci 62162306a36Sopenharmony_ci if (q->perturb_period) 62262306a36Sopenharmony_ci mod_timer(&q->perturb_timer, jiffies + q->perturb_period); 62362306a36Sopenharmony_ci rcu_read_unlock(); 62462306a36Sopenharmony_ci} 62562306a36Sopenharmony_ci 62662306a36Sopenharmony_cistatic int sfq_change(struct Qdisc *sch, struct nlattr *opt) 62762306a36Sopenharmony_ci{ 62862306a36Sopenharmony_ci struct sfq_sched_data *q = qdisc_priv(sch); 62962306a36Sopenharmony_ci struct tc_sfq_qopt *ctl = nla_data(opt); 63062306a36Sopenharmony_ci struct tc_sfq_qopt_v1 *ctl_v1 = NULL; 63162306a36Sopenharmony_ci unsigned int qlen, dropped = 0; 63262306a36Sopenharmony_ci struct red_parms *p = NULL; 63362306a36Sopenharmony_ci struct sk_buff *to_free = NULL; 63462306a36Sopenharmony_ci struct sk_buff *tail = NULL; 63562306a36Sopenharmony_ci 63662306a36Sopenharmony_ci if (opt->nla_len < nla_attr_size(sizeof(*ctl))) 63762306a36Sopenharmony_ci return -EINVAL; 63862306a36Sopenharmony_ci if (opt->nla_len >= nla_attr_size(sizeof(*ctl_v1))) 63962306a36Sopenharmony_ci ctl_v1 = nla_data(opt); 64062306a36Sopenharmony_ci if (ctl->divisor && 64162306a36Sopenharmony_ci (!is_power_of_2(ctl->divisor) || ctl->divisor > 65536)) 64262306a36Sopenharmony_ci return -EINVAL; 64362306a36Sopenharmony_ci 64462306a36Sopenharmony_ci /* slot->allot is a short, make sure quantum is not too big. */ 64562306a36Sopenharmony_ci if (ctl->quantum) { 64662306a36Sopenharmony_ci unsigned int scaled = SFQ_ALLOT_SIZE(ctl->quantum); 64762306a36Sopenharmony_ci 64862306a36Sopenharmony_ci if (scaled <= 0 || scaled > SHRT_MAX) 64962306a36Sopenharmony_ci return -EINVAL; 65062306a36Sopenharmony_ci } 65162306a36Sopenharmony_ci 65262306a36Sopenharmony_ci if (ctl_v1 && !red_check_params(ctl_v1->qth_min, ctl_v1->qth_max, 65362306a36Sopenharmony_ci ctl_v1->Wlog, ctl_v1->Scell_log, NULL)) 65462306a36Sopenharmony_ci return -EINVAL; 65562306a36Sopenharmony_ci if (ctl_v1 && ctl_v1->qth_min) { 65662306a36Sopenharmony_ci p = kmalloc(sizeof(*p), GFP_KERNEL); 65762306a36Sopenharmony_ci if (!p) 65862306a36Sopenharmony_ci return -ENOMEM; 65962306a36Sopenharmony_ci } 66062306a36Sopenharmony_ci sch_tree_lock(sch); 66162306a36Sopenharmony_ci if (ctl->quantum) { 66262306a36Sopenharmony_ci q->quantum = ctl->quantum; 66362306a36Sopenharmony_ci q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum); 66462306a36Sopenharmony_ci } 66562306a36Sopenharmony_ci q->perturb_period = ctl->perturb_period * HZ; 66662306a36Sopenharmony_ci if (ctl->flows) 66762306a36Sopenharmony_ci q->maxflows = min_t(u32, ctl->flows, SFQ_MAX_FLOWS); 66862306a36Sopenharmony_ci if (ctl->divisor) { 66962306a36Sopenharmony_ci q->divisor = ctl->divisor; 67062306a36Sopenharmony_ci q->maxflows = min_t(u32, q->maxflows, q->divisor); 67162306a36Sopenharmony_ci } 67262306a36Sopenharmony_ci if (ctl_v1) { 67362306a36Sopenharmony_ci if (ctl_v1->depth) 67462306a36Sopenharmony_ci q->maxdepth = min_t(u32, ctl_v1->depth, SFQ_MAX_DEPTH); 67562306a36Sopenharmony_ci if (p) { 67662306a36Sopenharmony_ci swap(q->red_parms, p); 67762306a36Sopenharmony_ci red_set_parms(q->red_parms, 67862306a36Sopenharmony_ci ctl_v1->qth_min, ctl_v1->qth_max, 67962306a36Sopenharmony_ci ctl_v1->Wlog, 68062306a36Sopenharmony_ci ctl_v1->Plog, ctl_v1->Scell_log, 68162306a36Sopenharmony_ci NULL, 68262306a36Sopenharmony_ci ctl_v1->max_P); 68362306a36Sopenharmony_ci } 68462306a36Sopenharmony_ci q->flags = ctl_v1->flags; 68562306a36Sopenharmony_ci q->headdrop = ctl_v1->headdrop; 68662306a36Sopenharmony_ci } 68762306a36Sopenharmony_ci if (ctl->limit) { 68862306a36Sopenharmony_ci q->limit = min_t(u32, ctl->limit, q->maxdepth * q->maxflows); 68962306a36Sopenharmony_ci q->maxflows = min_t(u32, q->maxflows, q->limit); 69062306a36Sopenharmony_ci } 69162306a36Sopenharmony_ci 69262306a36Sopenharmony_ci qlen = sch->q.qlen; 69362306a36Sopenharmony_ci while (sch->q.qlen > q->limit) { 69462306a36Sopenharmony_ci dropped += sfq_drop(sch, &to_free); 69562306a36Sopenharmony_ci if (!tail) 69662306a36Sopenharmony_ci tail = to_free; 69762306a36Sopenharmony_ci } 69862306a36Sopenharmony_ci 69962306a36Sopenharmony_ci rtnl_kfree_skbs(to_free, tail); 70062306a36Sopenharmony_ci qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped); 70162306a36Sopenharmony_ci 70262306a36Sopenharmony_ci del_timer(&q->perturb_timer); 70362306a36Sopenharmony_ci if (q->perturb_period) { 70462306a36Sopenharmony_ci mod_timer(&q->perturb_timer, jiffies + q->perturb_period); 70562306a36Sopenharmony_ci get_random_bytes(&q->perturbation, sizeof(q->perturbation)); 70662306a36Sopenharmony_ci } 70762306a36Sopenharmony_ci sch_tree_unlock(sch); 70862306a36Sopenharmony_ci kfree(p); 70962306a36Sopenharmony_ci return 0; 71062306a36Sopenharmony_ci} 71162306a36Sopenharmony_ci 71262306a36Sopenharmony_cistatic void *sfq_alloc(size_t sz) 71362306a36Sopenharmony_ci{ 71462306a36Sopenharmony_ci return kvmalloc(sz, GFP_KERNEL); 71562306a36Sopenharmony_ci} 71662306a36Sopenharmony_ci 71762306a36Sopenharmony_cistatic void sfq_free(void *addr) 71862306a36Sopenharmony_ci{ 71962306a36Sopenharmony_ci kvfree(addr); 72062306a36Sopenharmony_ci} 72162306a36Sopenharmony_ci 72262306a36Sopenharmony_cistatic void sfq_destroy(struct Qdisc *sch) 72362306a36Sopenharmony_ci{ 72462306a36Sopenharmony_ci struct sfq_sched_data *q = qdisc_priv(sch); 72562306a36Sopenharmony_ci 72662306a36Sopenharmony_ci tcf_block_put(q->block); 72762306a36Sopenharmony_ci q->perturb_period = 0; 72862306a36Sopenharmony_ci del_timer_sync(&q->perturb_timer); 72962306a36Sopenharmony_ci sfq_free(q->ht); 73062306a36Sopenharmony_ci sfq_free(q->slots); 73162306a36Sopenharmony_ci kfree(q->red_parms); 73262306a36Sopenharmony_ci} 73362306a36Sopenharmony_ci 73462306a36Sopenharmony_cistatic int sfq_init(struct Qdisc *sch, struct nlattr *opt, 73562306a36Sopenharmony_ci struct netlink_ext_ack *extack) 73662306a36Sopenharmony_ci{ 73762306a36Sopenharmony_ci struct sfq_sched_data *q = qdisc_priv(sch); 73862306a36Sopenharmony_ci int i; 73962306a36Sopenharmony_ci int err; 74062306a36Sopenharmony_ci 74162306a36Sopenharmony_ci q->sch = sch; 74262306a36Sopenharmony_ci timer_setup(&q->perturb_timer, sfq_perturbation, TIMER_DEFERRABLE); 74362306a36Sopenharmony_ci 74462306a36Sopenharmony_ci err = tcf_block_get(&q->block, &q->filter_list, sch, extack); 74562306a36Sopenharmony_ci if (err) 74662306a36Sopenharmony_ci return err; 74762306a36Sopenharmony_ci 74862306a36Sopenharmony_ci for (i = 0; i < SFQ_MAX_DEPTH + 1; i++) { 74962306a36Sopenharmony_ci q->dep[i].next = i + SFQ_MAX_FLOWS; 75062306a36Sopenharmony_ci q->dep[i].prev = i + SFQ_MAX_FLOWS; 75162306a36Sopenharmony_ci } 75262306a36Sopenharmony_ci 75362306a36Sopenharmony_ci q->limit = SFQ_MAX_DEPTH; 75462306a36Sopenharmony_ci q->maxdepth = SFQ_MAX_DEPTH; 75562306a36Sopenharmony_ci q->cur_depth = 0; 75662306a36Sopenharmony_ci q->tail = NULL; 75762306a36Sopenharmony_ci q->divisor = SFQ_DEFAULT_HASH_DIVISOR; 75862306a36Sopenharmony_ci q->maxflows = SFQ_DEFAULT_FLOWS; 75962306a36Sopenharmony_ci q->quantum = psched_mtu(qdisc_dev(sch)); 76062306a36Sopenharmony_ci q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum); 76162306a36Sopenharmony_ci q->perturb_period = 0; 76262306a36Sopenharmony_ci get_random_bytes(&q->perturbation, sizeof(q->perturbation)); 76362306a36Sopenharmony_ci 76462306a36Sopenharmony_ci if (opt) { 76562306a36Sopenharmony_ci int err = sfq_change(sch, opt); 76662306a36Sopenharmony_ci if (err) 76762306a36Sopenharmony_ci return err; 76862306a36Sopenharmony_ci } 76962306a36Sopenharmony_ci 77062306a36Sopenharmony_ci q->ht = sfq_alloc(sizeof(q->ht[0]) * q->divisor); 77162306a36Sopenharmony_ci q->slots = sfq_alloc(sizeof(q->slots[0]) * q->maxflows); 77262306a36Sopenharmony_ci if (!q->ht || !q->slots) { 77362306a36Sopenharmony_ci /* Note: sfq_destroy() will be called by our caller */ 77462306a36Sopenharmony_ci return -ENOMEM; 77562306a36Sopenharmony_ci } 77662306a36Sopenharmony_ci 77762306a36Sopenharmony_ci for (i = 0; i < q->divisor; i++) 77862306a36Sopenharmony_ci q->ht[i] = SFQ_EMPTY_SLOT; 77962306a36Sopenharmony_ci 78062306a36Sopenharmony_ci for (i = 0; i < q->maxflows; i++) { 78162306a36Sopenharmony_ci slot_queue_init(&q->slots[i]); 78262306a36Sopenharmony_ci sfq_link(q, i); 78362306a36Sopenharmony_ci } 78462306a36Sopenharmony_ci if (q->limit >= 1) 78562306a36Sopenharmony_ci sch->flags |= TCQ_F_CAN_BYPASS; 78662306a36Sopenharmony_ci else 78762306a36Sopenharmony_ci sch->flags &= ~TCQ_F_CAN_BYPASS; 78862306a36Sopenharmony_ci return 0; 78962306a36Sopenharmony_ci} 79062306a36Sopenharmony_ci 79162306a36Sopenharmony_cistatic int sfq_dump(struct Qdisc *sch, struct sk_buff *skb) 79262306a36Sopenharmony_ci{ 79362306a36Sopenharmony_ci struct sfq_sched_data *q = qdisc_priv(sch); 79462306a36Sopenharmony_ci unsigned char *b = skb_tail_pointer(skb); 79562306a36Sopenharmony_ci struct tc_sfq_qopt_v1 opt; 79662306a36Sopenharmony_ci struct red_parms *p = q->red_parms; 79762306a36Sopenharmony_ci 79862306a36Sopenharmony_ci memset(&opt, 0, sizeof(opt)); 79962306a36Sopenharmony_ci opt.v0.quantum = q->quantum; 80062306a36Sopenharmony_ci opt.v0.perturb_period = q->perturb_period / HZ; 80162306a36Sopenharmony_ci opt.v0.limit = q->limit; 80262306a36Sopenharmony_ci opt.v0.divisor = q->divisor; 80362306a36Sopenharmony_ci opt.v0.flows = q->maxflows; 80462306a36Sopenharmony_ci opt.depth = q->maxdepth; 80562306a36Sopenharmony_ci opt.headdrop = q->headdrop; 80662306a36Sopenharmony_ci 80762306a36Sopenharmony_ci if (p) { 80862306a36Sopenharmony_ci opt.qth_min = p->qth_min >> p->Wlog; 80962306a36Sopenharmony_ci opt.qth_max = p->qth_max >> p->Wlog; 81062306a36Sopenharmony_ci opt.Wlog = p->Wlog; 81162306a36Sopenharmony_ci opt.Plog = p->Plog; 81262306a36Sopenharmony_ci opt.Scell_log = p->Scell_log; 81362306a36Sopenharmony_ci opt.max_P = p->max_P; 81462306a36Sopenharmony_ci } 81562306a36Sopenharmony_ci memcpy(&opt.stats, &q->stats, sizeof(opt.stats)); 81662306a36Sopenharmony_ci opt.flags = q->flags; 81762306a36Sopenharmony_ci 81862306a36Sopenharmony_ci if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt)) 81962306a36Sopenharmony_ci goto nla_put_failure; 82062306a36Sopenharmony_ci 82162306a36Sopenharmony_ci return skb->len; 82262306a36Sopenharmony_ci 82362306a36Sopenharmony_cinla_put_failure: 82462306a36Sopenharmony_ci nlmsg_trim(skb, b); 82562306a36Sopenharmony_ci return -1; 82662306a36Sopenharmony_ci} 82762306a36Sopenharmony_ci 82862306a36Sopenharmony_cistatic struct Qdisc *sfq_leaf(struct Qdisc *sch, unsigned long arg) 82962306a36Sopenharmony_ci{ 83062306a36Sopenharmony_ci return NULL; 83162306a36Sopenharmony_ci} 83262306a36Sopenharmony_ci 83362306a36Sopenharmony_cistatic unsigned long sfq_find(struct Qdisc *sch, u32 classid) 83462306a36Sopenharmony_ci{ 83562306a36Sopenharmony_ci return 0; 83662306a36Sopenharmony_ci} 83762306a36Sopenharmony_ci 83862306a36Sopenharmony_cistatic unsigned long sfq_bind(struct Qdisc *sch, unsigned long parent, 83962306a36Sopenharmony_ci u32 classid) 84062306a36Sopenharmony_ci{ 84162306a36Sopenharmony_ci return 0; 84262306a36Sopenharmony_ci} 84362306a36Sopenharmony_ci 84462306a36Sopenharmony_cistatic void sfq_unbind(struct Qdisc *q, unsigned long cl) 84562306a36Sopenharmony_ci{ 84662306a36Sopenharmony_ci} 84762306a36Sopenharmony_ci 84862306a36Sopenharmony_cistatic struct tcf_block *sfq_tcf_block(struct Qdisc *sch, unsigned long cl, 84962306a36Sopenharmony_ci struct netlink_ext_ack *extack) 85062306a36Sopenharmony_ci{ 85162306a36Sopenharmony_ci struct sfq_sched_data *q = qdisc_priv(sch); 85262306a36Sopenharmony_ci 85362306a36Sopenharmony_ci if (cl) 85462306a36Sopenharmony_ci return NULL; 85562306a36Sopenharmony_ci return q->block; 85662306a36Sopenharmony_ci} 85762306a36Sopenharmony_ci 85862306a36Sopenharmony_cistatic int sfq_dump_class(struct Qdisc *sch, unsigned long cl, 85962306a36Sopenharmony_ci struct sk_buff *skb, struct tcmsg *tcm) 86062306a36Sopenharmony_ci{ 86162306a36Sopenharmony_ci tcm->tcm_handle |= TC_H_MIN(cl); 86262306a36Sopenharmony_ci return 0; 86362306a36Sopenharmony_ci} 86462306a36Sopenharmony_ci 86562306a36Sopenharmony_cistatic int sfq_dump_class_stats(struct Qdisc *sch, unsigned long cl, 86662306a36Sopenharmony_ci struct gnet_dump *d) 86762306a36Sopenharmony_ci{ 86862306a36Sopenharmony_ci struct sfq_sched_data *q = qdisc_priv(sch); 86962306a36Sopenharmony_ci sfq_index idx = q->ht[cl - 1]; 87062306a36Sopenharmony_ci struct gnet_stats_queue qs = { 0 }; 87162306a36Sopenharmony_ci struct tc_sfq_xstats xstats = { 0 }; 87262306a36Sopenharmony_ci 87362306a36Sopenharmony_ci if (idx != SFQ_EMPTY_SLOT) { 87462306a36Sopenharmony_ci const struct sfq_slot *slot = &q->slots[idx]; 87562306a36Sopenharmony_ci 87662306a36Sopenharmony_ci xstats.allot = slot->allot << SFQ_ALLOT_SHIFT; 87762306a36Sopenharmony_ci qs.qlen = slot->qlen; 87862306a36Sopenharmony_ci qs.backlog = slot->backlog; 87962306a36Sopenharmony_ci } 88062306a36Sopenharmony_ci if (gnet_stats_copy_queue(d, NULL, &qs, qs.qlen) < 0) 88162306a36Sopenharmony_ci return -1; 88262306a36Sopenharmony_ci return gnet_stats_copy_app(d, &xstats, sizeof(xstats)); 88362306a36Sopenharmony_ci} 88462306a36Sopenharmony_ci 88562306a36Sopenharmony_cistatic void sfq_walk(struct Qdisc *sch, struct qdisc_walker *arg) 88662306a36Sopenharmony_ci{ 88762306a36Sopenharmony_ci struct sfq_sched_data *q = qdisc_priv(sch); 88862306a36Sopenharmony_ci unsigned int i; 88962306a36Sopenharmony_ci 89062306a36Sopenharmony_ci if (arg->stop) 89162306a36Sopenharmony_ci return; 89262306a36Sopenharmony_ci 89362306a36Sopenharmony_ci for (i = 0; i < q->divisor; i++) { 89462306a36Sopenharmony_ci if (q->ht[i] == SFQ_EMPTY_SLOT) { 89562306a36Sopenharmony_ci arg->count++; 89662306a36Sopenharmony_ci continue; 89762306a36Sopenharmony_ci } 89862306a36Sopenharmony_ci if (!tc_qdisc_stats_dump(sch, i + 1, arg)) 89962306a36Sopenharmony_ci break; 90062306a36Sopenharmony_ci } 90162306a36Sopenharmony_ci} 90262306a36Sopenharmony_ci 90362306a36Sopenharmony_cistatic const struct Qdisc_class_ops sfq_class_ops = { 90462306a36Sopenharmony_ci .leaf = sfq_leaf, 90562306a36Sopenharmony_ci .find = sfq_find, 90662306a36Sopenharmony_ci .tcf_block = sfq_tcf_block, 90762306a36Sopenharmony_ci .bind_tcf = sfq_bind, 90862306a36Sopenharmony_ci .unbind_tcf = sfq_unbind, 90962306a36Sopenharmony_ci .dump = sfq_dump_class, 91062306a36Sopenharmony_ci .dump_stats = sfq_dump_class_stats, 91162306a36Sopenharmony_ci .walk = sfq_walk, 91262306a36Sopenharmony_ci}; 91362306a36Sopenharmony_ci 91462306a36Sopenharmony_cistatic struct Qdisc_ops sfq_qdisc_ops __read_mostly = { 91562306a36Sopenharmony_ci .cl_ops = &sfq_class_ops, 91662306a36Sopenharmony_ci .id = "sfq", 91762306a36Sopenharmony_ci .priv_size = sizeof(struct sfq_sched_data), 91862306a36Sopenharmony_ci .enqueue = sfq_enqueue, 91962306a36Sopenharmony_ci .dequeue = sfq_dequeue, 92062306a36Sopenharmony_ci .peek = qdisc_peek_dequeued, 92162306a36Sopenharmony_ci .init = sfq_init, 92262306a36Sopenharmony_ci .reset = sfq_reset, 92362306a36Sopenharmony_ci .destroy = sfq_destroy, 92462306a36Sopenharmony_ci .change = NULL, 92562306a36Sopenharmony_ci .dump = sfq_dump, 92662306a36Sopenharmony_ci .owner = THIS_MODULE, 92762306a36Sopenharmony_ci}; 92862306a36Sopenharmony_ci 92962306a36Sopenharmony_cistatic int __init sfq_module_init(void) 93062306a36Sopenharmony_ci{ 93162306a36Sopenharmony_ci return register_qdisc(&sfq_qdisc_ops); 93262306a36Sopenharmony_ci} 93362306a36Sopenharmony_cistatic void __exit sfq_module_exit(void) 93462306a36Sopenharmony_ci{ 93562306a36Sopenharmony_ci unregister_qdisc(&sfq_qdisc_ops); 93662306a36Sopenharmony_ci} 93762306a36Sopenharmony_cimodule_init(sfq_module_init) 93862306a36Sopenharmony_cimodule_exit(sfq_module_exit) 93962306a36Sopenharmony_ciMODULE_LICENSE("GPL"); 940