162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * net/sched/sch_sfb.c Stochastic Fair Blue 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Copyright (c) 2008-2011 Juliusz Chroboczek <jch@pps.jussieu.fr> 662306a36Sopenharmony_ci * Copyright (c) 2011 Eric Dumazet <eric.dumazet@gmail.com> 762306a36Sopenharmony_ci * 862306a36Sopenharmony_ci * W. Feng, D. Kandlur, D. Saha, K. Shin. Blue: 962306a36Sopenharmony_ci * A New Class of Active Queue Management Algorithms. 1062306a36Sopenharmony_ci * U. Michigan CSE-TR-387-99, April 1999. 1162306a36Sopenharmony_ci * 1262306a36Sopenharmony_ci * http://www.thefengs.com/wuchang/blue/CSE-TR-387-99.pdf 1362306a36Sopenharmony_ci */ 1462306a36Sopenharmony_ci 1562306a36Sopenharmony_ci#include <linux/module.h> 1662306a36Sopenharmony_ci#include <linux/types.h> 1762306a36Sopenharmony_ci#include <linux/kernel.h> 1862306a36Sopenharmony_ci#include <linux/errno.h> 1962306a36Sopenharmony_ci#include <linux/skbuff.h> 2062306a36Sopenharmony_ci#include <linux/random.h> 2162306a36Sopenharmony_ci#include <linux/siphash.h> 2262306a36Sopenharmony_ci#include <net/ip.h> 2362306a36Sopenharmony_ci#include <net/pkt_sched.h> 2462306a36Sopenharmony_ci#include <net/pkt_cls.h> 2562306a36Sopenharmony_ci#include <net/inet_ecn.h> 2662306a36Sopenharmony_ci 2762306a36Sopenharmony_ci/* 2862306a36Sopenharmony_ci * SFB uses two B[l][n] : L x N arrays of bins (L levels, N bins per level) 2962306a36Sopenharmony_ci * This implementation uses L = 8 and N = 16 3062306a36Sopenharmony_ci * This permits us to split one 32bit hash (provided per packet by rxhash or 3162306a36Sopenharmony_ci * external classifier) into 8 subhashes of 4 bits. 3262306a36Sopenharmony_ci */ 3362306a36Sopenharmony_ci#define SFB_BUCKET_SHIFT 4 3462306a36Sopenharmony_ci#define SFB_NUMBUCKETS (1 << SFB_BUCKET_SHIFT) /* N bins per Level */ 3562306a36Sopenharmony_ci#define SFB_BUCKET_MASK (SFB_NUMBUCKETS - 1) 3662306a36Sopenharmony_ci#define SFB_LEVELS (32 / SFB_BUCKET_SHIFT) /* L */ 3762306a36Sopenharmony_ci 3862306a36Sopenharmony_ci/* SFB algo uses a virtual queue, named "bin" */ 3962306a36Sopenharmony_cistruct sfb_bucket { 4062306a36Sopenharmony_ci u16 qlen; /* length of virtual queue */ 4162306a36Sopenharmony_ci u16 p_mark; /* marking probability */ 4262306a36Sopenharmony_ci}; 4362306a36Sopenharmony_ci 4462306a36Sopenharmony_ci/* We use a double buffering right before hash change 4562306a36Sopenharmony_ci * (Section 4.4 of SFB reference : moving hash functions) 4662306a36Sopenharmony_ci */ 4762306a36Sopenharmony_cistruct sfb_bins { 4862306a36Sopenharmony_ci siphash_key_t perturbation; /* siphash key */ 4962306a36Sopenharmony_ci struct sfb_bucket bins[SFB_LEVELS][SFB_NUMBUCKETS]; 5062306a36Sopenharmony_ci}; 5162306a36Sopenharmony_ci 5262306a36Sopenharmony_cistruct sfb_sched_data { 5362306a36Sopenharmony_ci struct Qdisc *qdisc; 5462306a36Sopenharmony_ci struct tcf_proto __rcu *filter_list; 5562306a36Sopenharmony_ci struct tcf_block *block; 5662306a36Sopenharmony_ci unsigned long rehash_interval; 5762306a36Sopenharmony_ci unsigned long warmup_time; /* double buffering warmup time in jiffies */ 5862306a36Sopenharmony_ci u32 max; 5962306a36Sopenharmony_ci u32 bin_size; /* maximum queue length per bin */ 6062306a36Sopenharmony_ci u32 increment; /* d1 */ 6162306a36Sopenharmony_ci u32 decrement; /* d2 */ 6262306a36Sopenharmony_ci u32 limit; /* HARD maximal queue length */ 6362306a36Sopenharmony_ci u32 penalty_rate; 6462306a36Sopenharmony_ci u32 penalty_burst; 6562306a36Sopenharmony_ci u32 tokens_avail; 6662306a36Sopenharmony_ci unsigned long rehash_time; 6762306a36Sopenharmony_ci unsigned long token_time; 6862306a36Sopenharmony_ci 6962306a36Sopenharmony_ci u8 slot; /* current active bins (0 or 1) */ 7062306a36Sopenharmony_ci bool double_buffering; 7162306a36Sopenharmony_ci struct sfb_bins bins[2]; 7262306a36Sopenharmony_ci 7362306a36Sopenharmony_ci struct { 7462306a36Sopenharmony_ci u32 earlydrop; 7562306a36Sopenharmony_ci u32 penaltydrop; 7662306a36Sopenharmony_ci u32 bucketdrop; 7762306a36Sopenharmony_ci u32 queuedrop; 7862306a36Sopenharmony_ci u32 childdrop; /* drops in child qdisc */ 7962306a36Sopenharmony_ci u32 marked; /* ECN mark */ 8062306a36Sopenharmony_ci } stats; 8162306a36Sopenharmony_ci}; 8262306a36Sopenharmony_ci 8362306a36Sopenharmony_ci/* 8462306a36Sopenharmony_ci * Each queued skb might be hashed on one or two bins 8562306a36Sopenharmony_ci * We store in skb_cb the two hash values. 8662306a36Sopenharmony_ci * (A zero value means double buffering was not used) 8762306a36Sopenharmony_ci */ 8862306a36Sopenharmony_cistruct sfb_skb_cb { 8962306a36Sopenharmony_ci u32 hashes[2]; 9062306a36Sopenharmony_ci}; 9162306a36Sopenharmony_ci 9262306a36Sopenharmony_cistatic inline struct sfb_skb_cb *sfb_skb_cb(const struct sk_buff *skb) 9362306a36Sopenharmony_ci{ 9462306a36Sopenharmony_ci qdisc_cb_private_validate(skb, sizeof(struct sfb_skb_cb)); 9562306a36Sopenharmony_ci return (struct sfb_skb_cb *)qdisc_skb_cb(skb)->data; 9662306a36Sopenharmony_ci} 9762306a36Sopenharmony_ci 9862306a36Sopenharmony_ci/* 9962306a36Sopenharmony_ci * If using 'internal' SFB flow classifier, hash comes from skb rxhash 10062306a36Sopenharmony_ci * If using external classifier, hash comes from the classid. 10162306a36Sopenharmony_ci */ 10262306a36Sopenharmony_cistatic u32 sfb_hash(const struct sk_buff *skb, u32 slot) 10362306a36Sopenharmony_ci{ 10462306a36Sopenharmony_ci return sfb_skb_cb(skb)->hashes[slot]; 10562306a36Sopenharmony_ci} 10662306a36Sopenharmony_ci 10762306a36Sopenharmony_ci/* Probabilities are coded as Q0.16 fixed-point values, 10862306a36Sopenharmony_ci * with 0xFFFF representing 65535/65536 (almost 1.0) 10962306a36Sopenharmony_ci * Addition and subtraction are saturating in [0, 65535] 11062306a36Sopenharmony_ci */ 11162306a36Sopenharmony_cistatic u32 prob_plus(u32 p1, u32 p2) 11262306a36Sopenharmony_ci{ 11362306a36Sopenharmony_ci u32 res = p1 + p2; 11462306a36Sopenharmony_ci 11562306a36Sopenharmony_ci return min_t(u32, res, SFB_MAX_PROB); 11662306a36Sopenharmony_ci} 11762306a36Sopenharmony_ci 11862306a36Sopenharmony_cistatic u32 prob_minus(u32 p1, u32 p2) 11962306a36Sopenharmony_ci{ 12062306a36Sopenharmony_ci return p1 > p2 ? p1 - p2 : 0; 12162306a36Sopenharmony_ci} 12262306a36Sopenharmony_ci 12362306a36Sopenharmony_cistatic void increment_one_qlen(u32 sfbhash, u32 slot, struct sfb_sched_data *q) 12462306a36Sopenharmony_ci{ 12562306a36Sopenharmony_ci int i; 12662306a36Sopenharmony_ci struct sfb_bucket *b = &q->bins[slot].bins[0][0]; 12762306a36Sopenharmony_ci 12862306a36Sopenharmony_ci for (i = 0; i < SFB_LEVELS; i++) { 12962306a36Sopenharmony_ci u32 hash = sfbhash & SFB_BUCKET_MASK; 13062306a36Sopenharmony_ci 13162306a36Sopenharmony_ci sfbhash >>= SFB_BUCKET_SHIFT; 13262306a36Sopenharmony_ci if (b[hash].qlen < 0xFFFF) 13362306a36Sopenharmony_ci b[hash].qlen++; 13462306a36Sopenharmony_ci b += SFB_NUMBUCKETS; /* next level */ 13562306a36Sopenharmony_ci } 13662306a36Sopenharmony_ci} 13762306a36Sopenharmony_ci 13862306a36Sopenharmony_cistatic void increment_qlen(const struct sfb_skb_cb *cb, struct sfb_sched_data *q) 13962306a36Sopenharmony_ci{ 14062306a36Sopenharmony_ci u32 sfbhash; 14162306a36Sopenharmony_ci 14262306a36Sopenharmony_ci sfbhash = cb->hashes[0]; 14362306a36Sopenharmony_ci if (sfbhash) 14462306a36Sopenharmony_ci increment_one_qlen(sfbhash, 0, q); 14562306a36Sopenharmony_ci 14662306a36Sopenharmony_ci sfbhash = cb->hashes[1]; 14762306a36Sopenharmony_ci if (sfbhash) 14862306a36Sopenharmony_ci increment_one_qlen(sfbhash, 1, q); 14962306a36Sopenharmony_ci} 15062306a36Sopenharmony_ci 15162306a36Sopenharmony_cistatic void decrement_one_qlen(u32 sfbhash, u32 slot, 15262306a36Sopenharmony_ci struct sfb_sched_data *q) 15362306a36Sopenharmony_ci{ 15462306a36Sopenharmony_ci int i; 15562306a36Sopenharmony_ci struct sfb_bucket *b = &q->bins[slot].bins[0][0]; 15662306a36Sopenharmony_ci 15762306a36Sopenharmony_ci for (i = 0; i < SFB_LEVELS; i++) { 15862306a36Sopenharmony_ci u32 hash = sfbhash & SFB_BUCKET_MASK; 15962306a36Sopenharmony_ci 16062306a36Sopenharmony_ci sfbhash >>= SFB_BUCKET_SHIFT; 16162306a36Sopenharmony_ci if (b[hash].qlen > 0) 16262306a36Sopenharmony_ci b[hash].qlen--; 16362306a36Sopenharmony_ci b += SFB_NUMBUCKETS; /* next level */ 16462306a36Sopenharmony_ci } 16562306a36Sopenharmony_ci} 16662306a36Sopenharmony_ci 16762306a36Sopenharmony_cistatic void decrement_qlen(const struct sk_buff *skb, struct sfb_sched_data *q) 16862306a36Sopenharmony_ci{ 16962306a36Sopenharmony_ci u32 sfbhash; 17062306a36Sopenharmony_ci 17162306a36Sopenharmony_ci sfbhash = sfb_hash(skb, 0); 17262306a36Sopenharmony_ci if (sfbhash) 17362306a36Sopenharmony_ci decrement_one_qlen(sfbhash, 0, q); 17462306a36Sopenharmony_ci 17562306a36Sopenharmony_ci sfbhash = sfb_hash(skb, 1); 17662306a36Sopenharmony_ci if (sfbhash) 17762306a36Sopenharmony_ci decrement_one_qlen(sfbhash, 1, q); 17862306a36Sopenharmony_ci} 17962306a36Sopenharmony_ci 18062306a36Sopenharmony_cistatic void decrement_prob(struct sfb_bucket *b, struct sfb_sched_data *q) 18162306a36Sopenharmony_ci{ 18262306a36Sopenharmony_ci b->p_mark = prob_minus(b->p_mark, q->decrement); 18362306a36Sopenharmony_ci} 18462306a36Sopenharmony_ci 18562306a36Sopenharmony_cistatic void increment_prob(struct sfb_bucket *b, struct sfb_sched_data *q) 18662306a36Sopenharmony_ci{ 18762306a36Sopenharmony_ci b->p_mark = prob_plus(b->p_mark, q->increment); 18862306a36Sopenharmony_ci} 18962306a36Sopenharmony_ci 19062306a36Sopenharmony_cistatic void sfb_zero_all_buckets(struct sfb_sched_data *q) 19162306a36Sopenharmony_ci{ 19262306a36Sopenharmony_ci memset(&q->bins, 0, sizeof(q->bins)); 19362306a36Sopenharmony_ci} 19462306a36Sopenharmony_ci 19562306a36Sopenharmony_ci/* 19662306a36Sopenharmony_ci * compute max qlen, max p_mark, and avg p_mark 19762306a36Sopenharmony_ci */ 19862306a36Sopenharmony_cistatic u32 sfb_compute_qlen(u32 *prob_r, u32 *avgpm_r, const struct sfb_sched_data *q) 19962306a36Sopenharmony_ci{ 20062306a36Sopenharmony_ci int i; 20162306a36Sopenharmony_ci u32 qlen = 0, prob = 0, totalpm = 0; 20262306a36Sopenharmony_ci const struct sfb_bucket *b = &q->bins[q->slot].bins[0][0]; 20362306a36Sopenharmony_ci 20462306a36Sopenharmony_ci for (i = 0; i < SFB_LEVELS * SFB_NUMBUCKETS; i++) { 20562306a36Sopenharmony_ci if (qlen < b->qlen) 20662306a36Sopenharmony_ci qlen = b->qlen; 20762306a36Sopenharmony_ci totalpm += b->p_mark; 20862306a36Sopenharmony_ci if (prob < b->p_mark) 20962306a36Sopenharmony_ci prob = b->p_mark; 21062306a36Sopenharmony_ci b++; 21162306a36Sopenharmony_ci } 21262306a36Sopenharmony_ci *prob_r = prob; 21362306a36Sopenharmony_ci *avgpm_r = totalpm / (SFB_LEVELS * SFB_NUMBUCKETS); 21462306a36Sopenharmony_ci return qlen; 21562306a36Sopenharmony_ci} 21662306a36Sopenharmony_ci 21762306a36Sopenharmony_ci 21862306a36Sopenharmony_cistatic void sfb_init_perturbation(u32 slot, struct sfb_sched_data *q) 21962306a36Sopenharmony_ci{ 22062306a36Sopenharmony_ci get_random_bytes(&q->bins[slot].perturbation, 22162306a36Sopenharmony_ci sizeof(q->bins[slot].perturbation)); 22262306a36Sopenharmony_ci} 22362306a36Sopenharmony_ci 22462306a36Sopenharmony_cistatic void sfb_swap_slot(struct sfb_sched_data *q) 22562306a36Sopenharmony_ci{ 22662306a36Sopenharmony_ci sfb_init_perturbation(q->slot, q); 22762306a36Sopenharmony_ci q->slot ^= 1; 22862306a36Sopenharmony_ci q->double_buffering = false; 22962306a36Sopenharmony_ci} 23062306a36Sopenharmony_ci 23162306a36Sopenharmony_ci/* Non elastic flows are allowed to use part of the bandwidth, expressed 23262306a36Sopenharmony_ci * in "penalty_rate" packets per second, with "penalty_burst" burst 23362306a36Sopenharmony_ci */ 23462306a36Sopenharmony_cistatic bool sfb_rate_limit(struct sk_buff *skb, struct sfb_sched_data *q) 23562306a36Sopenharmony_ci{ 23662306a36Sopenharmony_ci if (q->penalty_rate == 0 || q->penalty_burst == 0) 23762306a36Sopenharmony_ci return true; 23862306a36Sopenharmony_ci 23962306a36Sopenharmony_ci if (q->tokens_avail < 1) { 24062306a36Sopenharmony_ci unsigned long age = min(10UL * HZ, jiffies - q->token_time); 24162306a36Sopenharmony_ci 24262306a36Sopenharmony_ci q->tokens_avail = (age * q->penalty_rate) / HZ; 24362306a36Sopenharmony_ci if (q->tokens_avail > q->penalty_burst) 24462306a36Sopenharmony_ci q->tokens_avail = q->penalty_burst; 24562306a36Sopenharmony_ci q->token_time = jiffies; 24662306a36Sopenharmony_ci if (q->tokens_avail < 1) 24762306a36Sopenharmony_ci return true; 24862306a36Sopenharmony_ci } 24962306a36Sopenharmony_ci 25062306a36Sopenharmony_ci q->tokens_avail--; 25162306a36Sopenharmony_ci return false; 25262306a36Sopenharmony_ci} 25362306a36Sopenharmony_ci 25462306a36Sopenharmony_cistatic bool sfb_classify(struct sk_buff *skb, struct tcf_proto *fl, 25562306a36Sopenharmony_ci int *qerr, u32 *salt) 25662306a36Sopenharmony_ci{ 25762306a36Sopenharmony_ci struct tcf_result res; 25862306a36Sopenharmony_ci int result; 25962306a36Sopenharmony_ci 26062306a36Sopenharmony_ci result = tcf_classify(skb, NULL, fl, &res, false); 26162306a36Sopenharmony_ci if (result >= 0) { 26262306a36Sopenharmony_ci#ifdef CONFIG_NET_CLS_ACT 26362306a36Sopenharmony_ci switch (result) { 26462306a36Sopenharmony_ci case TC_ACT_STOLEN: 26562306a36Sopenharmony_ci case TC_ACT_QUEUED: 26662306a36Sopenharmony_ci case TC_ACT_TRAP: 26762306a36Sopenharmony_ci *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; 26862306a36Sopenharmony_ci fallthrough; 26962306a36Sopenharmony_ci case TC_ACT_SHOT: 27062306a36Sopenharmony_ci return false; 27162306a36Sopenharmony_ci } 27262306a36Sopenharmony_ci#endif 27362306a36Sopenharmony_ci *salt = TC_H_MIN(res.classid); 27462306a36Sopenharmony_ci return true; 27562306a36Sopenharmony_ci } 27662306a36Sopenharmony_ci return false; 27762306a36Sopenharmony_ci} 27862306a36Sopenharmony_ci 27962306a36Sopenharmony_cistatic int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch, 28062306a36Sopenharmony_ci struct sk_buff **to_free) 28162306a36Sopenharmony_ci{ 28262306a36Sopenharmony_ci 28362306a36Sopenharmony_ci struct sfb_sched_data *q = qdisc_priv(sch); 28462306a36Sopenharmony_ci unsigned int len = qdisc_pkt_len(skb); 28562306a36Sopenharmony_ci struct Qdisc *child = q->qdisc; 28662306a36Sopenharmony_ci struct tcf_proto *fl; 28762306a36Sopenharmony_ci struct sfb_skb_cb cb; 28862306a36Sopenharmony_ci int i; 28962306a36Sopenharmony_ci u32 p_min = ~0; 29062306a36Sopenharmony_ci u32 minqlen = ~0; 29162306a36Sopenharmony_ci u32 r, sfbhash; 29262306a36Sopenharmony_ci u32 slot = q->slot; 29362306a36Sopenharmony_ci int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; 29462306a36Sopenharmony_ci 29562306a36Sopenharmony_ci if (unlikely(sch->q.qlen >= q->limit)) { 29662306a36Sopenharmony_ci qdisc_qstats_overlimit(sch); 29762306a36Sopenharmony_ci q->stats.queuedrop++; 29862306a36Sopenharmony_ci goto drop; 29962306a36Sopenharmony_ci } 30062306a36Sopenharmony_ci 30162306a36Sopenharmony_ci if (q->rehash_interval > 0) { 30262306a36Sopenharmony_ci unsigned long limit = q->rehash_time + q->rehash_interval; 30362306a36Sopenharmony_ci 30462306a36Sopenharmony_ci if (unlikely(time_after(jiffies, limit))) { 30562306a36Sopenharmony_ci sfb_swap_slot(q); 30662306a36Sopenharmony_ci q->rehash_time = jiffies; 30762306a36Sopenharmony_ci } else if (unlikely(!q->double_buffering && q->warmup_time > 0 && 30862306a36Sopenharmony_ci time_after(jiffies, limit - q->warmup_time))) { 30962306a36Sopenharmony_ci q->double_buffering = true; 31062306a36Sopenharmony_ci } 31162306a36Sopenharmony_ci } 31262306a36Sopenharmony_ci 31362306a36Sopenharmony_ci fl = rcu_dereference_bh(q->filter_list); 31462306a36Sopenharmony_ci if (fl) { 31562306a36Sopenharmony_ci u32 salt; 31662306a36Sopenharmony_ci 31762306a36Sopenharmony_ci /* If using external classifiers, get result and record it. */ 31862306a36Sopenharmony_ci if (!sfb_classify(skb, fl, &ret, &salt)) 31962306a36Sopenharmony_ci goto other_drop; 32062306a36Sopenharmony_ci sfbhash = siphash_1u32(salt, &q->bins[slot].perturbation); 32162306a36Sopenharmony_ci } else { 32262306a36Sopenharmony_ci sfbhash = skb_get_hash_perturb(skb, &q->bins[slot].perturbation); 32362306a36Sopenharmony_ci } 32462306a36Sopenharmony_ci 32562306a36Sopenharmony_ci 32662306a36Sopenharmony_ci if (!sfbhash) 32762306a36Sopenharmony_ci sfbhash = 1; 32862306a36Sopenharmony_ci sfb_skb_cb(skb)->hashes[slot] = sfbhash; 32962306a36Sopenharmony_ci 33062306a36Sopenharmony_ci for (i = 0; i < SFB_LEVELS; i++) { 33162306a36Sopenharmony_ci u32 hash = sfbhash & SFB_BUCKET_MASK; 33262306a36Sopenharmony_ci struct sfb_bucket *b = &q->bins[slot].bins[i][hash]; 33362306a36Sopenharmony_ci 33462306a36Sopenharmony_ci sfbhash >>= SFB_BUCKET_SHIFT; 33562306a36Sopenharmony_ci if (b->qlen == 0) 33662306a36Sopenharmony_ci decrement_prob(b, q); 33762306a36Sopenharmony_ci else if (b->qlen >= q->bin_size) 33862306a36Sopenharmony_ci increment_prob(b, q); 33962306a36Sopenharmony_ci if (minqlen > b->qlen) 34062306a36Sopenharmony_ci minqlen = b->qlen; 34162306a36Sopenharmony_ci if (p_min > b->p_mark) 34262306a36Sopenharmony_ci p_min = b->p_mark; 34362306a36Sopenharmony_ci } 34462306a36Sopenharmony_ci 34562306a36Sopenharmony_ci slot ^= 1; 34662306a36Sopenharmony_ci sfb_skb_cb(skb)->hashes[slot] = 0; 34762306a36Sopenharmony_ci 34862306a36Sopenharmony_ci if (unlikely(minqlen >= q->max)) { 34962306a36Sopenharmony_ci qdisc_qstats_overlimit(sch); 35062306a36Sopenharmony_ci q->stats.bucketdrop++; 35162306a36Sopenharmony_ci goto drop; 35262306a36Sopenharmony_ci } 35362306a36Sopenharmony_ci 35462306a36Sopenharmony_ci if (unlikely(p_min >= SFB_MAX_PROB)) { 35562306a36Sopenharmony_ci /* Inelastic flow */ 35662306a36Sopenharmony_ci if (q->double_buffering) { 35762306a36Sopenharmony_ci sfbhash = skb_get_hash_perturb(skb, 35862306a36Sopenharmony_ci &q->bins[slot].perturbation); 35962306a36Sopenharmony_ci if (!sfbhash) 36062306a36Sopenharmony_ci sfbhash = 1; 36162306a36Sopenharmony_ci sfb_skb_cb(skb)->hashes[slot] = sfbhash; 36262306a36Sopenharmony_ci 36362306a36Sopenharmony_ci for (i = 0; i < SFB_LEVELS; i++) { 36462306a36Sopenharmony_ci u32 hash = sfbhash & SFB_BUCKET_MASK; 36562306a36Sopenharmony_ci struct sfb_bucket *b = &q->bins[slot].bins[i][hash]; 36662306a36Sopenharmony_ci 36762306a36Sopenharmony_ci sfbhash >>= SFB_BUCKET_SHIFT; 36862306a36Sopenharmony_ci if (b->qlen == 0) 36962306a36Sopenharmony_ci decrement_prob(b, q); 37062306a36Sopenharmony_ci else if (b->qlen >= q->bin_size) 37162306a36Sopenharmony_ci increment_prob(b, q); 37262306a36Sopenharmony_ci } 37362306a36Sopenharmony_ci } 37462306a36Sopenharmony_ci if (sfb_rate_limit(skb, q)) { 37562306a36Sopenharmony_ci qdisc_qstats_overlimit(sch); 37662306a36Sopenharmony_ci q->stats.penaltydrop++; 37762306a36Sopenharmony_ci goto drop; 37862306a36Sopenharmony_ci } 37962306a36Sopenharmony_ci goto enqueue; 38062306a36Sopenharmony_ci } 38162306a36Sopenharmony_ci 38262306a36Sopenharmony_ci r = get_random_u16() & SFB_MAX_PROB; 38362306a36Sopenharmony_ci 38462306a36Sopenharmony_ci if (unlikely(r < p_min)) { 38562306a36Sopenharmony_ci if (unlikely(p_min > SFB_MAX_PROB / 2)) { 38662306a36Sopenharmony_ci /* If we're marking that many packets, then either 38762306a36Sopenharmony_ci * this flow is unresponsive, or we're badly congested. 38862306a36Sopenharmony_ci * In either case, we want to start dropping packets. 38962306a36Sopenharmony_ci */ 39062306a36Sopenharmony_ci if (r < (p_min - SFB_MAX_PROB / 2) * 2) { 39162306a36Sopenharmony_ci q->stats.earlydrop++; 39262306a36Sopenharmony_ci goto drop; 39362306a36Sopenharmony_ci } 39462306a36Sopenharmony_ci } 39562306a36Sopenharmony_ci if (INET_ECN_set_ce(skb)) { 39662306a36Sopenharmony_ci q->stats.marked++; 39762306a36Sopenharmony_ci } else { 39862306a36Sopenharmony_ci q->stats.earlydrop++; 39962306a36Sopenharmony_ci goto drop; 40062306a36Sopenharmony_ci } 40162306a36Sopenharmony_ci } 40262306a36Sopenharmony_ci 40362306a36Sopenharmony_cienqueue: 40462306a36Sopenharmony_ci memcpy(&cb, sfb_skb_cb(skb), sizeof(cb)); 40562306a36Sopenharmony_ci ret = qdisc_enqueue(skb, child, to_free); 40662306a36Sopenharmony_ci if (likely(ret == NET_XMIT_SUCCESS)) { 40762306a36Sopenharmony_ci sch->qstats.backlog += len; 40862306a36Sopenharmony_ci sch->q.qlen++; 40962306a36Sopenharmony_ci increment_qlen(&cb, q); 41062306a36Sopenharmony_ci } else if (net_xmit_drop_count(ret)) { 41162306a36Sopenharmony_ci q->stats.childdrop++; 41262306a36Sopenharmony_ci qdisc_qstats_drop(sch); 41362306a36Sopenharmony_ci } 41462306a36Sopenharmony_ci return ret; 41562306a36Sopenharmony_ci 41662306a36Sopenharmony_cidrop: 41762306a36Sopenharmony_ci qdisc_drop(skb, sch, to_free); 41862306a36Sopenharmony_ci return NET_XMIT_CN; 41962306a36Sopenharmony_ciother_drop: 42062306a36Sopenharmony_ci if (ret & __NET_XMIT_BYPASS) 42162306a36Sopenharmony_ci qdisc_qstats_drop(sch); 42262306a36Sopenharmony_ci kfree_skb(skb); 42362306a36Sopenharmony_ci return ret; 42462306a36Sopenharmony_ci} 42562306a36Sopenharmony_ci 42662306a36Sopenharmony_cistatic struct sk_buff *sfb_dequeue(struct Qdisc *sch) 42762306a36Sopenharmony_ci{ 42862306a36Sopenharmony_ci struct sfb_sched_data *q = qdisc_priv(sch); 42962306a36Sopenharmony_ci struct Qdisc *child = q->qdisc; 43062306a36Sopenharmony_ci struct sk_buff *skb; 43162306a36Sopenharmony_ci 43262306a36Sopenharmony_ci skb = child->dequeue(q->qdisc); 43362306a36Sopenharmony_ci 43462306a36Sopenharmony_ci if (skb) { 43562306a36Sopenharmony_ci qdisc_bstats_update(sch, skb); 43662306a36Sopenharmony_ci qdisc_qstats_backlog_dec(sch, skb); 43762306a36Sopenharmony_ci sch->q.qlen--; 43862306a36Sopenharmony_ci decrement_qlen(skb, q); 43962306a36Sopenharmony_ci } 44062306a36Sopenharmony_ci 44162306a36Sopenharmony_ci return skb; 44262306a36Sopenharmony_ci} 44362306a36Sopenharmony_ci 44462306a36Sopenharmony_cistatic struct sk_buff *sfb_peek(struct Qdisc *sch) 44562306a36Sopenharmony_ci{ 44662306a36Sopenharmony_ci struct sfb_sched_data *q = qdisc_priv(sch); 44762306a36Sopenharmony_ci struct Qdisc *child = q->qdisc; 44862306a36Sopenharmony_ci 44962306a36Sopenharmony_ci return child->ops->peek(child); 45062306a36Sopenharmony_ci} 45162306a36Sopenharmony_ci 45262306a36Sopenharmony_ci/* No sfb_drop -- impossible since the child doesn't return the dropped skb. */ 45362306a36Sopenharmony_ci 45462306a36Sopenharmony_cistatic void sfb_reset(struct Qdisc *sch) 45562306a36Sopenharmony_ci{ 45662306a36Sopenharmony_ci struct sfb_sched_data *q = qdisc_priv(sch); 45762306a36Sopenharmony_ci 45862306a36Sopenharmony_ci if (likely(q->qdisc)) 45962306a36Sopenharmony_ci qdisc_reset(q->qdisc); 46062306a36Sopenharmony_ci q->slot = 0; 46162306a36Sopenharmony_ci q->double_buffering = false; 46262306a36Sopenharmony_ci sfb_zero_all_buckets(q); 46362306a36Sopenharmony_ci sfb_init_perturbation(0, q); 46462306a36Sopenharmony_ci} 46562306a36Sopenharmony_ci 46662306a36Sopenharmony_cistatic void sfb_destroy(struct Qdisc *sch) 46762306a36Sopenharmony_ci{ 46862306a36Sopenharmony_ci struct sfb_sched_data *q = qdisc_priv(sch); 46962306a36Sopenharmony_ci 47062306a36Sopenharmony_ci tcf_block_put(q->block); 47162306a36Sopenharmony_ci qdisc_put(q->qdisc); 47262306a36Sopenharmony_ci} 47362306a36Sopenharmony_ci 47462306a36Sopenharmony_cistatic const struct nla_policy sfb_policy[TCA_SFB_MAX + 1] = { 47562306a36Sopenharmony_ci [TCA_SFB_PARMS] = { .len = sizeof(struct tc_sfb_qopt) }, 47662306a36Sopenharmony_ci}; 47762306a36Sopenharmony_ci 47862306a36Sopenharmony_cistatic const struct tc_sfb_qopt sfb_default_ops = { 47962306a36Sopenharmony_ci .rehash_interval = 600 * MSEC_PER_SEC, 48062306a36Sopenharmony_ci .warmup_time = 60 * MSEC_PER_SEC, 48162306a36Sopenharmony_ci .limit = 0, 48262306a36Sopenharmony_ci .max = 25, 48362306a36Sopenharmony_ci .bin_size = 20, 48462306a36Sopenharmony_ci .increment = (SFB_MAX_PROB + 500) / 1000, /* 0.1 % */ 48562306a36Sopenharmony_ci .decrement = (SFB_MAX_PROB + 3000) / 6000, 48662306a36Sopenharmony_ci .penalty_rate = 10, 48762306a36Sopenharmony_ci .penalty_burst = 20, 48862306a36Sopenharmony_ci}; 48962306a36Sopenharmony_ci 49062306a36Sopenharmony_cistatic int sfb_change(struct Qdisc *sch, struct nlattr *opt, 49162306a36Sopenharmony_ci struct netlink_ext_ack *extack) 49262306a36Sopenharmony_ci{ 49362306a36Sopenharmony_ci struct sfb_sched_data *q = qdisc_priv(sch); 49462306a36Sopenharmony_ci struct Qdisc *child, *old; 49562306a36Sopenharmony_ci struct nlattr *tb[TCA_SFB_MAX + 1]; 49662306a36Sopenharmony_ci const struct tc_sfb_qopt *ctl = &sfb_default_ops; 49762306a36Sopenharmony_ci u32 limit; 49862306a36Sopenharmony_ci int err; 49962306a36Sopenharmony_ci 50062306a36Sopenharmony_ci if (opt) { 50162306a36Sopenharmony_ci err = nla_parse_nested_deprecated(tb, TCA_SFB_MAX, opt, 50262306a36Sopenharmony_ci sfb_policy, NULL); 50362306a36Sopenharmony_ci if (err < 0) 50462306a36Sopenharmony_ci return -EINVAL; 50562306a36Sopenharmony_ci 50662306a36Sopenharmony_ci if (tb[TCA_SFB_PARMS] == NULL) 50762306a36Sopenharmony_ci return -EINVAL; 50862306a36Sopenharmony_ci 50962306a36Sopenharmony_ci ctl = nla_data(tb[TCA_SFB_PARMS]); 51062306a36Sopenharmony_ci } 51162306a36Sopenharmony_ci 51262306a36Sopenharmony_ci limit = ctl->limit; 51362306a36Sopenharmony_ci if (limit == 0) 51462306a36Sopenharmony_ci limit = qdisc_dev(sch)->tx_queue_len; 51562306a36Sopenharmony_ci 51662306a36Sopenharmony_ci child = fifo_create_dflt(sch, &pfifo_qdisc_ops, limit, extack); 51762306a36Sopenharmony_ci if (IS_ERR(child)) 51862306a36Sopenharmony_ci return PTR_ERR(child); 51962306a36Sopenharmony_ci 52062306a36Sopenharmony_ci if (child != &noop_qdisc) 52162306a36Sopenharmony_ci qdisc_hash_add(child, true); 52262306a36Sopenharmony_ci sch_tree_lock(sch); 52362306a36Sopenharmony_ci 52462306a36Sopenharmony_ci qdisc_purge_queue(q->qdisc); 52562306a36Sopenharmony_ci old = q->qdisc; 52662306a36Sopenharmony_ci q->qdisc = child; 52762306a36Sopenharmony_ci 52862306a36Sopenharmony_ci q->rehash_interval = msecs_to_jiffies(ctl->rehash_interval); 52962306a36Sopenharmony_ci q->warmup_time = msecs_to_jiffies(ctl->warmup_time); 53062306a36Sopenharmony_ci q->rehash_time = jiffies; 53162306a36Sopenharmony_ci q->limit = limit; 53262306a36Sopenharmony_ci q->increment = ctl->increment; 53362306a36Sopenharmony_ci q->decrement = ctl->decrement; 53462306a36Sopenharmony_ci q->max = ctl->max; 53562306a36Sopenharmony_ci q->bin_size = ctl->bin_size; 53662306a36Sopenharmony_ci q->penalty_rate = ctl->penalty_rate; 53762306a36Sopenharmony_ci q->penalty_burst = ctl->penalty_burst; 53862306a36Sopenharmony_ci q->tokens_avail = ctl->penalty_burst; 53962306a36Sopenharmony_ci q->token_time = jiffies; 54062306a36Sopenharmony_ci 54162306a36Sopenharmony_ci q->slot = 0; 54262306a36Sopenharmony_ci q->double_buffering = false; 54362306a36Sopenharmony_ci sfb_zero_all_buckets(q); 54462306a36Sopenharmony_ci sfb_init_perturbation(0, q); 54562306a36Sopenharmony_ci sfb_init_perturbation(1, q); 54662306a36Sopenharmony_ci 54762306a36Sopenharmony_ci sch_tree_unlock(sch); 54862306a36Sopenharmony_ci qdisc_put(old); 54962306a36Sopenharmony_ci 55062306a36Sopenharmony_ci return 0; 55162306a36Sopenharmony_ci} 55262306a36Sopenharmony_ci 55362306a36Sopenharmony_cistatic int sfb_init(struct Qdisc *sch, struct nlattr *opt, 55462306a36Sopenharmony_ci struct netlink_ext_ack *extack) 55562306a36Sopenharmony_ci{ 55662306a36Sopenharmony_ci struct sfb_sched_data *q = qdisc_priv(sch); 55762306a36Sopenharmony_ci int err; 55862306a36Sopenharmony_ci 55962306a36Sopenharmony_ci err = tcf_block_get(&q->block, &q->filter_list, sch, extack); 56062306a36Sopenharmony_ci if (err) 56162306a36Sopenharmony_ci return err; 56262306a36Sopenharmony_ci 56362306a36Sopenharmony_ci q->qdisc = &noop_qdisc; 56462306a36Sopenharmony_ci return sfb_change(sch, opt, extack); 56562306a36Sopenharmony_ci} 56662306a36Sopenharmony_ci 56762306a36Sopenharmony_cistatic int sfb_dump(struct Qdisc *sch, struct sk_buff *skb) 56862306a36Sopenharmony_ci{ 56962306a36Sopenharmony_ci struct sfb_sched_data *q = qdisc_priv(sch); 57062306a36Sopenharmony_ci struct nlattr *opts; 57162306a36Sopenharmony_ci struct tc_sfb_qopt opt = { 57262306a36Sopenharmony_ci .rehash_interval = jiffies_to_msecs(q->rehash_interval), 57362306a36Sopenharmony_ci .warmup_time = jiffies_to_msecs(q->warmup_time), 57462306a36Sopenharmony_ci .limit = q->limit, 57562306a36Sopenharmony_ci .max = q->max, 57662306a36Sopenharmony_ci .bin_size = q->bin_size, 57762306a36Sopenharmony_ci .increment = q->increment, 57862306a36Sopenharmony_ci .decrement = q->decrement, 57962306a36Sopenharmony_ci .penalty_rate = q->penalty_rate, 58062306a36Sopenharmony_ci .penalty_burst = q->penalty_burst, 58162306a36Sopenharmony_ci }; 58262306a36Sopenharmony_ci 58362306a36Sopenharmony_ci sch->qstats.backlog = q->qdisc->qstats.backlog; 58462306a36Sopenharmony_ci opts = nla_nest_start_noflag(skb, TCA_OPTIONS); 58562306a36Sopenharmony_ci if (opts == NULL) 58662306a36Sopenharmony_ci goto nla_put_failure; 58762306a36Sopenharmony_ci if (nla_put(skb, TCA_SFB_PARMS, sizeof(opt), &opt)) 58862306a36Sopenharmony_ci goto nla_put_failure; 58962306a36Sopenharmony_ci return nla_nest_end(skb, opts); 59062306a36Sopenharmony_ci 59162306a36Sopenharmony_cinla_put_failure: 59262306a36Sopenharmony_ci nla_nest_cancel(skb, opts); 59362306a36Sopenharmony_ci return -EMSGSIZE; 59462306a36Sopenharmony_ci} 59562306a36Sopenharmony_ci 59662306a36Sopenharmony_cistatic int sfb_dump_stats(struct Qdisc *sch, struct gnet_dump *d) 59762306a36Sopenharmony_ci{ 59862306a36Sopenharmony_ci struct sfb_sched_data *q = qdisc_priv(sch); 59962306a36Sopenharmony_ci struct tc_sfb_xstats st = { 60062306a36Sopenharmony_ci .earlydrop = q->stats.earlydrop, 60162306a36Sopenharmony_ci .penaltydrop = q->stats.penaltydrop, 60262306a36Sopenharmony_ci .bucketdrop = q->stats.bucketdrop, 60362306a36Sopenharmony_ci .queuedrop = q->stats.queuedrop, 60462306a36Sopenharmony_ci .childdrop = q->stats.childdrop, 60562306a36Sopenharmony_ci .marked = q->stats.marked, 60662306a36Sopenharmony_ci }; 60762306a36Sopenharmony_ci 60862306a36Sopenharmony_ci st.maxqlen = sfb_compute_qlen(&st.maxprob, &st.avgprob, q); 60962306a36Sopenharmony_ci 61062306a36Sopenharmony_ci return gnet_stats_copy_app(d, &st, sizeof(st)); 61162306a36Sopenharmony_ci} 61262306a36Sopenharmony_ci 61362306a36Sopenharmony_cistatic int sfb_dump_class(struct Qdisc *sch, unsigned long cl, 61462306a36Sopenharmony_ci struct sk_buff *skb, struct tcmsg *tcm) 61562306a36Sopenharmony_ci{ 61662306a36Sopenharmony_ci return -ENOSYS; 61762306a36Sopenharmony_ci} 61862306a36Sopenharmony_ci 61962306a36Sopenharmony_cistatic int sfb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, 62062306a36Sopenharmony_ci struct Qdisc **old, struct netlink_ext_ack *extack) 62162306a36Sopenharmony_ci{ 62262306a36Sopenharmony_ci struct sfb_sched_data *q = qdisc_priv(sch); 62362306a36Sopenharmony_ci 62462306a36Sopenharmony_ci if (new == NULL) 62562306a36Sopenharmony_ci new = &noop_qdisc; 62662306a36Sopenharmony_ci 62762306a36Sopenharmony_ci *old = qdisc_replace(sch, new, &q->qdisc); 62862306a36Sopenharmony_ci return 0; 62962306a36Sopenharmony_ci} 63062306a36Sopenharmony_ci 63162306a36Sopenharmony_cistatic struct Qdisc *sfb_leaf(struct Qdisc *sch, unsigned long arg) 63262306a36Sopenharmony_ci{ 63362306a36Sopenharmony_ci struct sfb_sched_data *q = qdisc_priv(sch); 63462306a36Sopenharmony_ci 63562306a36Sopenharmony_ci return q->qdisc; 63662306a36Sopenharmony_ci} 63762306a36Sopenharmony_ci 63862306a36Sopenharmony_cistatic unsigned long sfb_find(struct Qdisc *sch, u32 classid) 63962306a36Sopenharmony_ci{ 64062306a36Sopenharmony_ci return 1; 64162306a36Sopenharmony_ci} 64262306a36Sopenharmony_ci 64362306a36Sopenharmony_cistatic void sfb_unbind(struct Qdisc *sch, unsigned long arg) 64462306a36Sopenharmony_ci{ 64562306a36Sopenharmony_ci} 64662306a36Sopenharmony_ci 64762306a36Sopenharmony_cistatic int sfb_change_class(struct Qdisc *sch, u32 classid, u32 parentid, 64862306a36Sopenharmony_ci struct nlattr **tca, unsigned long *arg, 64962306a36Sopenharmony_ci struct netlink_ext_ack *extack) 65062306a36Sopenharmony_ci{ 65162306a36Sopenharmony_ci return -ENOSYS; 65262306a36Sopenharmony_ci} 65362306a36Sopenharmony_ci 65462306a36Sopenharmony_cistatic int sfb_delete(struct Qdisc *sch, unsigned long cl, 65562306a36Sopenharmony_ci struct netlink_ext_ack *extack) 65662306a36Sopenharmony_ci{ 65762306a36Sopenharmony_ci return -ENOSYS; 65862306a36Sopenharmony_ci} 65962306a36Sopenharmony_ci 66062306a36Sopenharmony_cistatic void sfb_walk(struct Qdisc *sch, struct qdisc_walker *walker) 66162306a36Sopenharmony_ci{ 66262306a36Sopenharmony_ci if (!walker->stop) { 66362306a36Sopenharmony_ci tc_qdisc_stats_dump(sch, 1, walker); 66462306a36Sopenharmony_ci } 66562306a36Sopenharmony_ci} 66662306a36Sopenharmony_ci 66762306a36Sopenharmony_cistatic struct tcf_block *sfb_tcf_block(struct Qdisc *sch, unsigned long cl, 66862306a36Sopenharmony_ci struct netlink_ext_ack *extack) 66962306a36Sopenharmony_ci{ 67062306a36Sopenharmony_ci struct sfb_sched_data *q = qdisc_priv(sch); 67162306a36Sopenharmony_ci 67262306a36Sopenharmony_ci if (cl) 67362306a36Sopenharmony_ci return NULL; 67462306a36Sopenharmony_ci return q->block; 67562306a36Sopenharmony_ci} 67662306a36Sopenharmony_ci 67762306a36Sopenharmony_cistatic unsigned long sfb_bind(struct Qdisc *sch, unsigned long parent, 67862306a36Sopenharmony_ci u32 classid) 67962306a36Sopenharmony_ci{ 68062306a36Sopenharmony_ci return 0; 68162306a36Sopenharmony_ci} 68262306a36Sopenharmony_ci 68362306a36Sopenharmony_ci 68462306a36Sopenharmony_cistatic const struct Qdisc_class_ops sfb_class_ops = { 68562306a36Sopenharmony_ci .graft = sfb_graft, 68662306a36Sopenharmony_ci .leaf = sfb_leaf, 68762306a36Sopenharmony_ci .find = sfb_find, 68862306a36Sopenharmony_ci .change = sfb_change_class, 68962306a36Sopenharmony_ci .delete = sfb_delete, 69062306a36Sopenharmony_ci .walk = sfb_walk, 69162306a36Sopenharmony_ci .tcf_block = sfb_tcf_block, 69262306a36Sopenharmony_ci .bind_tcf = sfb_bind, 69362306a36Sopenharmony_ci .unbind_tcf = sfb_unbind, 69462306a36Sopenharmony_ci .dump = sfb_dump_class, 69562306a36Sopenharmony_ci}; 69662306a36Sopenharmony_ci 69762306a36Sopenharmony_cistatic struct Qdisc_ops sfb_qdisc_ops __read_mostly = { 69862306a36Sopenharmony_ci .id = "sfb", 69962306a36Sopenharmony_ci .priv_size = sizeof(struct sfb_sched_data), 70062306a36Sopenharmony_ci .cl_ops = &sfb_class_ops, 70162306a36Sopenharmony_ci .enqueue = sfb_enqueue, 70262306a36Sopenharmony_ci .dequeue = sfb_dequeue, 70362306a36Sopenharmony_ci .peek = sfb_peek, 70462306a36Sopenharmony_ci .init = sfb_init, 70562306a36Sopenharmony_ci .reset = sfb_reset, 70662306a36Sopenharmony_ci .destroy = sfb_destroy, 70762306a36Sopenharmony_ci .change = sfb_change, 70862306a36Sopenharmony_ci .dump = sfb_dump, 70962306a36Sopenharmony_ci .dump_stats = sfb_dump_stats, 71062306a36Sopenharmony_ci .owner = THIS_MODULE, 71162306a36Sopenharmony_ci}; 71262306a36Sopenharmony_ci 71362306a36Sopenharmony_cistatic int __init sfb_module_init(void) 71462306a36Sopenharmony_ci{ 71562306a36Sopenharmony_ci return register_qdisc(&sfb_qdisc_ops); 71662306a36Sopenharmony_ci} 71762306a36Sopenharmony_ci 71862306a36Sopenharmony_cistatic void __exit sfb_module_exit(void) 71962306a36Sopenharmony_ci{ 72062306a36Sopenharmony_ci unregister_qdisc(&sfb_qdisc_ops); 72162306a36Sopenharmony_ci} 72262306a36Sopenharmony_ci 72362306a36Sopenharmony_cimodule_init(sfb_module_init) 72462306a36Sopenharmony_cimodule_exit(sfb_module_exit) 72562306a36Sopenharmony_ci 72662306a36Sopenharmony_ciMODULE_DESCRIPTION("Stochastic Fair Blue queue discipline"); 72762306a36Sopenharmony_ciMODULE_AUTHOR("Juliusz Chroboczek"); 72862306a36Sopenharmony_ciMODULE_AUTHOR("Eric Dumazet"); 72962306a36Sopenharmony_ciMODULE_LICENSE("GPL"); 730