162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 262306a36Sopenharmony_ci/* Flow Queue PIE discipline 362306a36Sopenharmony_ci * 462306a36Sopenharmony_ci * Copyright (C) 2019 Mohit P. Tahiliani <tahiliani@nitk.edu.in> 562306a36Sopenharmony_ci * Copyright (C) 2019 Sachin D. Patil <sdp.sachin@gmail.com> 662306a36Sopenharmony_ci * Copyright (C) 2019 V. Saicharan <vsaicharan1998@gmail.com> 762306a36Sopenharmony_ci * Copyright (C) 2019 Mohit Bhasi <mohitbhasi1998@gmail.com> 862306a36Sopenharmony_ci * Copyright (C) 2019 Leslie Monis <lesliemonis@gmail.com> 962306a36Sopenharmony_ci * Copyright (C) 2019 Gautam Ramakrishnan <gautamramk@gmail.com> 1062306a36Sopenharmony_ci */ 1162306a36Sopenharmony_ci 1262306a36Sopenharmony_ci#include <linux/jhash.h> 1362306a36Sopenharmony_ci#include <linux/sizes.h> 1462306a36Sopenharmony_ci#include <linux/vmalloc.h> 1562306a36Sopenharmony_ci#include <net/pkt_cls.h> 1662306a36Sopenharmony_ci#include <net/pie.h> 1762306a36Sopenharmony_ci 1862306a36Sopenharmony_ci/* Flow Queue PIE 1962306a36Sopenharmony_ci * 2062306a36Sopenharmony_ci * Principles: 2162306a36Sopenharmony_ci * - Packets are classified on flows. 2262306a36Sopenharmony_ci * - This is a Stochastic model (as we use a hash, several flows might 2362306a36Sopenharmony_ci * be hashed to the same slot) 2462306a36Sopenharmony_ci * - Each flow has a PIE managed queue. 2562306a36Sopenharmony_ci * - Flows are linked onto two (Round Robin) lists, 2662306a36Sopenharmony_ci * so that new flows have priority on old ones. 2762306a36Sopenharmony_ci * - For a given flow, packets are not reordered. 2862306a36Sopenharmony_ci * - Drops during enqueue only. 2962306a36Sopenharmony_ci * - ECN capability is off by default. 3062306a36Sopenharmony_ci * - ECN threshold (if ECN is enabled) is at 10% by default. 3162306a36Sopenharmony_ci * - Uses timestamps to calculate queue delay by default. 3262306a36Sopenharmony_ci */ 3362306a36Sopenharmony_ci 3462306a36Sopenharmony_ci/** 3562306a36Sopenharmony_ci * struct fq_pie_flow - contains data for each flow 3662306a36Sopenharmony_ci * @vars: pie vars associated with the flow 3762306a36Sopenharmony_ci * @deficit: number of remaining byte credits 3862306a36Sopenharmony_ci * @backlog: size of data in the flow 3962306a36Sopenharmony_ci * @qlen: number of packets in the flow 4062306a36Sopenharmony_ci * @flowchain: flowchain for the flow 4162306a36Sopenharmony_ci * @head: first packet in the flow 4262306a36Sopenharmony_ci * @tail: last packet in the flow 4362306a36Sopenharmony_ci */ 4462306a36Sopenharmony_cistruct fq_pie_flow { 4562306a36Sopenharmony_ci struct pie_vars vars; 4662306a36Sopenharmony_ci s32 deficit; 4762306a36Sopenharmony_ci u32 backlog; 4862306a36Sopenharmony_ci u32 qlen; 4962306a36Sopenharmony_ci struct list_head flowchain; 5062306a36Sopenharmony_ci struct sk_buff *head; 5162306a36Sopenharmony_ci struct sk_buff *tail; 5262306a36Sopenharmony_ci}; 5362306a36Sopenharmony_ci 5462306a36Sopenharmony_cistruct fq_pie_sched_data { 5562306a36Sopenharmony_ci struct tcf_proto __rcu *filter_list; /* optional external classifier */ 5662306a36Sopenharmony_ci struct tcf_block *block; 5762306a36Sopenharmony_ci struct fq_pie_flow *flows; 5862306a36Sopenharmony_ci struct Qdisc *sch; 5962306a36Sopenharmony_ci struct list_head old_flows; 6062306a36Sopenharmony_ci struct list_head new_flows; 6162306a36Sopenharmony_ci struct pie_params p_params; 6262306a36Sopenharmony_ci u32 ecn_prob; 6362306a36Sopenharmony_ci u32 flows_cnt; 6462306a36Sopenharmony_ci u32 flows_cursor; 6562306a36Sopenharmony_ci u32 quantum; 6662306a36Sopenharmony_ci u32 memory_limit; 6762306a36Sopenharmony_ci u32 new_flow_count; 6862306a36Sopenharmony_ci u32 memory_usage; 6962306a36Sopenharmony_ci u32 overmemory; 7062306a36Sopenharmony_ci struct pie_stats stats; 7162306a36Sopenharmony_ci struct timer_list adapt_timer; 7262306a36Sopenharmony_ci}; 7362306a36Sopenharmony_ci 7462306a36Sopenharmony_cistatic unsigned int fq_pie_hash(const struct fq_pie_sched_data *q, 7562306a36Sopenharmony_ci struct sk_buff *skb) 7662306a36Sopenharmony_ci{ 7762306a36Sopenharmony_ci return reciprocal_scale(skb_get_hash(skb), q->flows_cnt); 7862306a36Sopenharmony_ci} 7962306a36Sopenharmony_ci 8062306a36Sopenharmony_cistatic unsigned int fq_pie_classify(struct sk_buff *skb, struct Qdisc *sch, 8162306a36Sopenharmony_ci int *qerr) 8262306a36Sopenharmony_ci{ 8362306a36Sopenharmony_ci struct fq_pie_sched_data *q = qdisc_priv(sch); 8462306a36Sopenharmony_ci struct tcf_proto *filter; 8562306a36Sopenharmony_ci struct tcf_result res; 8662306a36Sopenharmony_ci int result; 8762306a36Sopenharmony_ci 8862306a36Sopenharmony_ci if (TC_H_MAJ(skb->priority) == sch->handle && 8962306a36Sopenharmony_ci TC_H_MIN(skb->priority) > 0 && 9062306a36Sopenharmony_ci TC_H_MIN(skb->priority) <= q->flows_cnt) 9162306a36Sopenharmony_ci return TC_H_MIN(skb->priority); 9262306a36Sopenharmony_ci 9362306a36Sopenharmony_ci filter = rcu_dereference_bh(q->filter_list); 9462306a36Sopenharmony_ci if (!filter) 9562306a36Sopenharmony_ci return fq_pie_hash(q, skb) + 1; 9662306a36Sopenharmony_ci 9762306a36Sopenharmony_ci *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; 9862306a36Sopenharmony_ci result = tcf_classify(skb, NULL, filter, &res, false); 9962306a36Sopenharmony_ci if (result >= 0) { 10062306a36Sopenharmony_ci#ifdef CONFIG_NET_CLS_ACT 10162306a36Sopenharmony_ci switch (result) { 10262306a36Sopenharmony_ci case TC_ACT_STOLEN: 10362306a36Sopenharmony_ci case TC_ACT_QUEUED: 10462306a36Sopenharmony_ci case TC_ACT_TRAP: 10562306a36Sopenharmony_ci *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; 10662306a36Sopenharmony_ci fallthrough; 10762306a36Sopenharmony_ci case TC_ACT_SHOT: 10862306a36Sopenharmony_ci return 0; 10962306a36Sopenharmony_ci } 11062306a36Sopenharmony_ci#endif 11162306a36Sopenharmony_ci if (TC_H_MIN(res.classid) <= q->flows_cnt) 11262306a36Sopenharmony_ci return TC_H_MIN(res.classid); 11362306a36Sopenharmony_ci } 11462306a36Sopenharmony_ci return 0; 11562306a36Sopenharmony_ci} 11662306a36Sopenharmony_ci 11762306a36Sopenharmony_ci/* add skb to flow queue (tail add) */ 11862306a36Sopenharmony_cistatic inline void flow_queue_add(struct fq_pie_flow *flow, 11962306a36Sopenharmony_ci struct sk_buff *skb) 12062306a36Sopenharmony_ci{ 12162306a36Sopenharmony_ci if (!flow->head) 12262306a36Sopenharmony_ci flow->head = skb; 12362306a36Sopenharmony_ci else 12462306a36Sopenharmony_ci flow->tail->next = skb; 12562306a36Sopenharmony_ci flow->tail = skb; 12662306a36Sopenharmony_ci skb->next = NULL; 12762306a36Sopenharmony_ci} 12862306a36Sopenharmony_ci 12962306a36Sopenharmony_cistatic int fq_pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, 13062306a36Sopenharmony_ci struct sk_buff **to_free) 13162306a36Sopenharmony_ci{ 13262306a36Sopenharmony_ci struct fq_pie_sched_data *q = qdisc_priv(sch); 13362306a36Sopenharmony_ci struct fq_pie_flow *sel_flow; 13462306a36Sopenharmony_ci int ret; 13562306a36Sopenharmony_ci u8 memory_limited = false; 13662306a36Sopenharmony_ci u8 enqueue = false; 13762306a36Sopenharmony_ci u32 pkt_len; 13862306a36Sopenharmony_ci u32 idx; 13962306a36Sopenharmony_ci 14062306a36Sopenharmony_ci /* Classifies packet into corresponding flow */ 14162306a36Sopenharmony_ci idx = fq_pie_classify(skb, sch, &ret); 14262306a36Sopenharmony_ci if (idx == 0) { 14362306a36Sopenharmony_ci if (ret & __NET_XMIT_BYPASS) 14462306a36Sopenharmony_ci qdisc_qstats_drop(sch); 14562306a36Sopenharmony_ci __qdisc_drop(skb, to_free); 14662306a36Sopenharmony_ci return ret; 14762306a36Sopenharmony_ci } 14862306a36Sopenharmony_ci idx--; 14962306a36Sopenharmony_ci 15062306a36Sopenharmony_ci sel_flow = &q->flows[idx]; 15162306a36Sopenharmony_ci /* Checks whether adding a new packet would exceed memory limit */ 15262306a36Sopenharmony_ci get_pie_cb(skb)->mem_usage = skb->truesize; 15362306a36Sopenharmony_ci memory_limited = q->memory_usage > q->memory_limit + skb->truesize; 15462306a36Sopenharmony_ci 15562306a36Sopenharmony_ci /* Checks if the qdisc is full */ 15662306a36Sopenharmony_ci if (unlikely(qdisc_qlen(sch) >= sch->limit)) { 15762306a36Sopenharmony_ci q->stats.overlimit++; 15862306a36Sopenharmony_ci goto out; 15962306a36Sopenharmony_ci } else if (unlikely(memory_limited)) { 16062306a36Sopenharmony_ci q->overmemory++; 16162306a36Sopenharmony_ci } 16262306a36Sopenharmony_ci 16362306a36Sopenharmony_ci if (!pie_drop_early(sch, &q->p_params, &sel_flow->vars, 16462306a36Sopenharmony_ci sel_flow->backlog, skb->len)) { 16562306a36Sopenharmony_ci enqueue = true; 16662306a36Sopenharmony_ci } else if (q->p_params.ecn && 16762306a36Sopenharmony_ci sel_flow->vars.prob <= (MAX_PROB / 100) * q->ecn_prob && 16862306a36Sopenharmony_ci INET_ECN_set_ce(skb)) { 16962306a36Sopenharmony_ci /* If packet is ecn capable, mark it if drop probability 17062306a36Sopenharmony_ci * is lower than the parameter ecn_prob, else drop it. 17162306a36Sopenharmony_ci */ 17262306a36Sopenharmony_ci q->stats.ecn_mark++; 17362306a36Sopenharmony_ci enqueue = true; 17462306a36Sopenharmony_ci } 17562306a36Sopenharmony_ci if (enqueue) { 17662306a36Sopenharmony_ci /* Set enqueue time only when dq_rate_estimator is disabled. */ 17762306a36Sopenharmony_ci if (!q->p_params.dq_rate_estimator) 17862306a36Sopenharmony_ci pie_set_enqueue_time(skb); 17962306a36Sopenharmony_ci 18062306a36Sopenharmony_ci pkt_len = qdisc_pkt_len(skb); 18162306a36Sopenharmony_ci q->stats.packets_in++; 18262306a36Sopenharmony_ci q->memory_usage += skb->truesize; 18362306a36Sopenharmony_ci sch->qstats.backlog += pkt_len; 18462306a36Sopenharmony_ci sch->q.qlen++; 18562306a36Sopenharmony_ci flow_queue_add(sel_flow, skb); 18662306a36Sopenharmony_ci if (list_empty(&sel_flow->flowchain)) { 18762306a36Sopenharmony_ci list_add_tail(&sel_flow->flowchain, &q->new_flows); 18862306a36Sopenharmony_ci q->new_flow_count++; 18962306a36Sopenharmony_ci sel_flow->deficit = q->quantum; 19062306a36Sopenharmony_ci sel_flow->qlen = 0; 19162306a36Sopenharmony_ci sel_flow->backlog = 0; 19262306a36Sopenharmony_ci } 19362306a36Sopenharmony_ci sel_flow->qlen++; 19462306a36Sopenharmony_ci sel_flow->backlog += pkt_len; 19562306a36Sopenharmony_ci return NET_XMIT_SUCCESS; 19662306a36Sopenharmony_ci } 19762306a36Sopenharmony_ciout: 19862306a36Sopenharmony_ci q->stats.dropped++; 19962306a36Sopenharmony_ci sel_flow->vars.accu_prob = 0; 20062306a36Sopenharmony_ci __qdisc_drop(skb, to_free); 20162306a36Sopenharmony_ci qdisc_qstats_drop(sch); 20262306a36Sopenharmony_ci return NET_XMIT_CN; 20362306a36Sopenharmony_ci} 20462306a36Sopenharmony_ci 20562306a36Sopenharmony_cistatic struct netlink_range_validation fq_pie_q_range = { 20662306a36Sopenharmony_ci .min = 1, 20762306a36Sopenharmony_ci .max = 1 << 20, 20862306a36Sopenharmony_ci}; 20962306a36Sopenharmony_ci 21062306a36Sopenharmony_cistatic const struct nla_policy fq_pie_policy[TCA_FQ_PIE_MAX + 1] = { 21162306a36Sopenharmony_ci [TCA_FQ_PIE_LIMIT] = {.type = NLA_U32}, 21262306a36Sopenharmony_ci [TCA_FQ_PIE_FLOWS] = {.type = NLA_U32}, 21362306a36Sopenharmony_ci [TCA_FQ_PIE_TARGET] = {.type = NLA_U32}, 21462306a36Sopenharmony_ci [TCA_FQ_PIE_TUPDATE] = {.type = NLA_U32}, 21562306a36Sopenharmony_ci [TCA_FQ_PIE_ALPHA] = {.type = NLA_U32}, 21662306a36Sopenharmony_ci [TCA_FQ_PIE_BETA] = {.type = NLA_U32}, 21762306a36Sopenharmony_ci [TCA_FQ_PIE_QUANTUM] = 21862306a36Sopenharmony_ci NLA_POLICY_FULL_RANGE(NLA_U32, &fq_pie_q_range), 21962306a36Sopenharmony_ci [TCA_FQ_PIE_MEMORY_LIMIT] = {.type = NLA_U32}, 22062306a36Sopenharmony_ci [TCA_FQ_PIE_ECN_PROB] = {.type = NLA_U32}, 22162306a36Sopenharmony_ci [TCA_FQ_PIE_ECN] = {.type = NLA_U32}, 22262306a36Sopenharmony_ci [TCA_FQ_PIE_BYTEMODE] = {.type = NLA_U32}, 22362306a36Sopenharmony_ci [TCA_FQ_PIE_DQ_RATE_ESTIMATOR] = {.type = NLA_U32}, 22462306a36Sopenharmony_ci}; 22562306a36Sopenharmony_ci 22662306a36Sopenharmony_cistatic inline struct sk_buff *dequeue_head(struct fq_pie_flow *flow) 22762306a36Sopenharmony_ci{ 22862306a36Sopenharmony_ci struct sk_buff *skb = flow->head; 22962306a36Sopenharmony_ci 23062306a36Sopenharmony_ci flow->head = skb->next; 23162306a36Sopenharmony_ci skb->next = NULL; 23262306a36Sopenharmony_ci return skb; 23362306a36Sopenharmony_ci} 23462306a36Sopenharmony_ci 23562306a36Sopenharmony_cistatic struct sk_buff *fq_pie_qdisc_dequeue(struct Qdisc *sch) 23662306a36Sopenharmony_ci{ 23762306a36Sopenharmony_ci struct fq_pie_sched_data *q = qdisc_priv(sch); 23862306a36Sopenharmony_ci struct sk_buff *skb = NULL; 23962306a36Sopenharmony_ci struct fq_pie_flow *flow; 24062306a36Sopenharmony_ci struct list_head *head; 24162306a36Sopenharmony_ci u32 pkt_len; 24262306a36Sopenharmony_ci 24362306a36Sopenharmony_cibegin: 24462306a36Sopenharmony_ci head = &q->new_flows; 24562306a36Sopenharmony_ci if (list_empty(head)) { 24662306a36Sopenharmony_ci head = &q->old_flows; 24762306a36Sopenharmony_ci if (list_empty(head)) 24862306a36Sopenharmony_ci return NULL; 24962306a36Sopenharmony_ci } 25062306a36Sopenharmony_ci 25162306a36Sopenharmony_ci flow = list_first_entry(head, struct fq_pie_flow, flowchain); 25262306a36Sopenharmony_ci /* Flow has exhausted all its credits */ 25362306a36Sopenharmony_ci if (flow->deficit <= 0) { 25462306a36Sopenharmony_ci flow->deficit += q->quantum; 25562306a36Sopenharmony_ci list_move_tail(&flow->flowchain, &q->old_flows); 25662306a36Sopenharmony_ci goto begin; 25762306a36Sopenharmony_ci } 25862306a36Sopenharmony_ci 25962306a36Sopenharmony_ci if (flow->head) { 26062306a36Sopenharmony_ci skb = dequeue_head(flow); 26162306a36Sopenharmony_ci pkt_len = qdisc_pkt_len(skb); 26262306a36Sopenharmony_ci sch->qstats.backlog -= pkt_len; 26362306a36Sopenharmony_ci sch->q.qlen--; 26462306a36Sopenharmony_ci qdisc_bstats_update(sch, skb); 26562306a36Sopenharmony_ci } 26662306a36Sopenharmony_ci 26762306a36Sopenharmony_ci if (!skb) { 26862306a36Sopenharmony_ci /* force a pass through old_flows to prevent starvation */ 26962306a36Sopenharmony_ci if (head == &q->new_flows && !list_empty(&q->old_flows)) 27062306a36Sopenharmony_ci list_move_tail(&flow->flowchain, &q->old_flows); 27162306a36Sopenharmony_ci else 27262306a36Sopenharmony_ci list_del_init(&flow->flowchain); 27362306a36Sopenharmony_ci goto begin; 27462306a36Sopenharmony_ci } 27562306a36Sopenharmony_ci 27662306a36Sopenharmony_ci flow->qlen--; 27762306a36Sopenharmony_ci flow->deficit -= pkt_len; 27862306a36Sopenharmony_ci flow->backlog -= pkt_len; 27962306a36Sopenharmony_ci q->memory_usage -= get_pie_cb(skb)->mem_usage; 28062306a36Sopenharmony_ci pie_process_dequeue(skb, &q->p_params, &flow->vars, flow->backlog); 28162306a36Sopenharmony_ci return skb; 28262306a36Sopenharmony_ci} 28362306a36Sopenharmony_ci 28462306a36Sopenharmony_cistatic int fq_pie_change(struct Qdisc *sch, struct nlattr *opt, 28562306a36Sopenharmony_ci struct netlink_ext_ack *extack) 28662306a36Sopenharmony_ci{ 28762306a36Sopenharmony_ci struct fq_pie_sched_data *q = qdisc_priv(sch); 28862306a36Sopenharmony_ci struct nlattr *tb[TCA_FQ_PIE_MAX + 1]; 28962306a36Sopenharmony_ci unsigned int len_dropped = 0; 29062306a36Sopenharmony_ci unsigned int num_dropped = 0; 29162306a36Sopenharmony_ci int err; 29262306a36Sopenharmony_ci 29362306a36Sopenharmony_ci err = nla_parse_nested(tb, TCA_FQ_PIE_MAX, opt, fq_pie_policy, extack); 29462306a36Sopenharmony_ci if (err < 0) 29562306a36Sopenharmony_ci return err; 29662306a36Sopenharmony_ci 29762306a36Sopenharmony_ci sch_tree_lock(sch); 29862306a36Sopenharmony_ci if (tb[TCA_FQ_PIE_LIMIT]) { 29962306a36Sopenharmony_ci u32 limit = nla_get_u32(tb[TCA_FQ_PIE_LIMIT]); 30062306a36Sopenharmony_ci 30162306a36Sopenharmony_ci q->p_params.limit = limit; 30262306a36Sopenharmony_ci sch->limit = limit; 30362306a36Sopenharmony_ci } 30462306a36Sopenharmony_ci if (tb[TCA_FQ_PIE_FLOWS]) { 30562306a36Sopenharmony_ci if (q->flows) { 30662306a36Sopenharmony_ci NL_SET_ERR_MSG_MOD(extack, 30762306a36Sopenharmony_ci "Number of flows cannot be changed"); 30862306a36Sopenharmony_ci goto flow_error; 30962306a36Sopenharmony_ci } 31062306a36Sopenharmony_ci q->flows_cnt = nla_get_u32(tb[TCA_FQ_PIE_FLOWS]); 31162306a36Sopenharmony_ci if (!q->flows_cnt || q->flows_cnt > 65536) { 31262306a36Sopenharmony_ci NL_SET_ERR_MSG_MOD(extack, 31362306a36Sopenharmony_ci "Number of flows must range in [1..65536]"); 31462306a36Sopenharmony_ci goto flow_error; 31562306a36Sopenharmony_ci } 31662306a36Sopenharmony_ci } 31762306a36Sopenharmony_ci 31862306a36Sopenharmony_ci /* convert from microseconds to pschedtime */ 31962306a36Sopenharmony_ci if (tb[TCA_FQ_PIE_TARGET]) { 32062306a36Sopenharmony_ci /* target is in us */ 32162306a36Sopenharmony_ci u32 target = nla_get_u32(tb[TCA_FQ_PIE_TARGET]); 32262306a36Sopenharmony_ci 32362306a36Sopenharmony_ci /* convert to pschedtime */ 32462306a36Sopenharmony_ci q->p_params.target = 32562306a36Sopenharmony_ci PSCHED_NS2TICKS((u64)target * NSEC_PER_USEC); 32662306a36Sopenharmony_ci } 32762306a36Sopenharmony_ci 32862306a36Sopenharmony_ci /* tupdate is in jiffies */ 32962306a36Sopenharmony_ci if (tb[TCA_FQ_PIE_TUPDATE]) 33062306a36Sopenharmony_ci q->p_params.tupdate = 33162306a36Sopenharmony_ci usecs_to_jiffies(nla_get_u32(tb[TCA_FQ_PIE_TUPDATE])); 33262306a36Sopenharmony_ci 33362306a36Sopenharmony_ci if (tb[TCA_FQ_PIE_ALPHA]) 33462306a36Sopenharmony_ci q->p_params.alpha = nla_get_u32(tb[TCA_FQ_PIE_ALPHA]); 33562306a36Sopenharmony_ci 33662306a36Sopenharmony_ci if (tb[TCA_FQ_PIE_BETA]) 33762306a36Sopenharmony_ci q->p_params.beta = nla_get_u32(tb[TCA_FQ_PIE_BETA]); 33862306a36Sopenharmony_ci 33962306a36Sopenharmony_ci if (tb[TCA_FQ_PIE_QUANTUM]) 34062306a36Sopenharmony_ci q->quantum = nla_get_u32(tb[TCA_FQ_PIE_QUANTUM]); 34162306a36Sopenharmony_ci 34262306a36Sopenharmony_ci if (tb[TCA_FQ_PIE_MEMORY_LIMIT]) 34362306a36Sopenharmony_ci q->memory_limit = nla_get_u32(tb[TCA_FQ_PIE_MEMORY_LIMIT]); 34462306a36Sopenharmony_ci 34562306a36Sopenharmony_ci if (tb[TCA_FQ_PIE_ECN_PROB]) 34662306a36Sopenharmony_ci q->ecn_prob = nla_get_u32(tb[TCA_FQ_PIE_ECN_PROB]); 34762306a36Sopenharmony_ci 34862306a36Sopenharmony_ci if (tb[TCA_FQ_PIE_ECN]) 34962306a36Sopenharmony_ci q->p_params.ecn = nla_get_u32(tb[TCA_FQ_PIE_ECN]); 35062306a36Sopenharmony_ci 35162306a36Sopenharmony_ci if (tb[TCA_FQ_PIE_BYTEMODE]) 35262306a36Sopenharmony_ci q->p_params.bytemode = nla_get_u32(tb[TCA_FQ_PIE_BYTEMODE]); 35362306a36Sopenharmony_ci 35462306a36Sopenharmony_ci if (tb[TCA_FQ_PIE_DQ_RATE_ESTIMATOR]) 35562306a36Sopenharmony_ci q->p_params.dq_rate_estimator = 35662306a36Sopenharmony_ci nla_get_u32(tb[TCA_FQ_PIE_DQ_RATE_ESTIMATOR]); 35762306a36Sopenharmony_ci 35862306a36Sopenharmony_ci /* Drop excess packets if new limit is lower */ 35962306a36Sopenharmony_ci while (sch->q.qlen > sch->limit) { 36062306a36Sopenharmony_ci struct sk_buff *skb = fq_pie_qdisc_dequeue(sch); 36162306a36Sopenharmony_ci 36262306a36Sopenharmony_ci len_dropped += qdisc_pkt_len(skb); 36362306a36Sopenharmony_ci num_dropped += 1; 36462306a36Sopenharmony_ci rtnl_kfree_skbs(skb, skb); 36562306a36Sopenharmony_ci } 36662306a36Sopenharmony_ci qdisc_tree_reduce_backlog(sch, num_dropped, len_dropped); 36762306a36Sopenharmony_ci 36862306a36Sopenharmony_ci sch_tree_unlock(sch); 36962306a36Sopenharmony_ci return 0; 37062306a36Sopenharmony_ci 37162306a36Sopenharmony_ciflow_error: 37262306a36Sopenharmony_ci sch_tree_unlock(sch); 37362306a36Sopenharmony_ci return -EINVAL; 37462306a36Sopenharmony_ci} 37562306a36Sopenharmony_ci 37662306a36Sopenharmony_cistatic void fq_pie_timer(struct timer_list *t) 37762306a36Sopenharmony_ci{ 37862306a36Sopenharmony_ci struct fq_pie_sched_data *q = from_timer(q, t, adapt_timer); 37962306a36Sopenharmony_ci unsigned long next, tupdate; 38062306a36Sopenharmony_ci struct Qdisc *sch = q->sch; 38162306a36Sopenharmony_ci spinlock_t *root_lock; /* to lock qdisc for probability calculations */ 38262306a36Sopenharmony_ci int max_cnt, i; 38362306a36Sopenharmony_ci 38462306a36Sopenharmony_ci rcu_read_lock(); 38562306a36Sopenharmony_ci root_lock = qdisc_lock(qdisc_root_sleeping(sch)); 38662306a36Sopenharmony_ci spin_lock(root_lock); 38762306a36Sopenharmony_ci 38862306a36Sopenharmony_ci /* Limit this expensive loop to 2048 flows per round. */ 38962306a36Sopenharmony_ci max_cnt = min_t(int, q->flows_cnt - q->flows_cursor, 2048); 39062306a36Sopenharmony_ci for (i = 0; i < max_cnt; i++) { 39162306a36Sopenharmony_ci pie_calculate_probability(&q->p_params, 39262306a36Sopenharmony_ci &q->flows[q->flows_cursor].vars, 39362306a36Sopenharmony_ci q->flows[q->flows_cursor].backlog); 39462306a36Sopenharmony_ci q->flows_cursor++; 39562306a36Sopenharmony_ci } 39662306a36Sopenharmony_ci 39762306a36Sopenharmony_ci tupdate = q->p_params.tupdate; 39862306a36Sopenharmony_ci next = 0; 39962306a36Sopenharmony_ci if (q->flows_cursor >= q->flows_cnt) { 40062306a36Sopenharmony_ci q->flows_cursor = 0; 40162306a36Sopenharmony_ci next = tupdate; 40262306a36Sopenharmony_ci } 40362306a36Sopenharmony_ci if (tupdate) 40462306a36Sopenharmony_ci mod_timer(&q->adapt_timer, jiffies + next); 40562306a36Sopenharmony_ci spin_unlock(root_lock); 40662306a36Sopenharmony_ci rcu_read_unlock(); 40762306a36Sopenharmony_ci} 40862306a36Sopenharmony_ci 40962306a36Sopenharmony_cistatic int fq_pie_init(struct Qdisc *sch, struct nlattr *opt, 41062306a36Sopenharmony_ci struct netlink_ext_ack *extack) 41162306a36Sopenharmony_ci{ 41262306a36Sopenharmony_ci struct fq_pie_sched_data *q = qdisc_priv(sch); 41362306a36Sopenharmony_ci int err; 41462306a36Sopenharmony_ci u32 idx; 41562306a36Sopenharmony_ci 41662306a36Sopenharmony_ci pie_params_init(&q->p_params); 41762306a36Sopenharmony_ci sch->limit = 10 * 1024; 41862306a36Sopenharmony_ci q->p_params.limit = sch->limit; 41962306a36Sopenharmony_ci q->quantum = psched_mtu(qdisc_dev(sch)); 42062306a36Sopenharmony_ci q->sch = sch; 42162306a36Sopenharmony_ci q->ecn_prob = 10; 42262306a36Sopenharmony_ci q->flows_cnt = 1024; 42362306a36Sopenharmony_ci q->memory_limit = SZ_32M; 42462306a36Sopenharmony_ci 42562306a36Sopenharmony_ci INIT_LIST_HEAD(&q->new_flows); 42662306a36Sopenharmony_ci INIT_LIST_HEAD(&q->old_flows); 42762306a36Sopenharmony_ci timer_setup(&q->adapt_timer, fq_pie_timer, 0); 42862306a36Sopenharmony_ci 42962306a36Sopenharmony_ci if (opt) { 43062306a36Sopenharmony_ci err = fq_pie_change(sch, opt, extack); 43162306a36Sopenharmony_ci 43262306a36Sopenharmony_ci if (err) 43362306a36Sopenharmony_ci return err; 43462306a36Sopenharmony_ci } 43562306a36Sopenharmony_ci 43662306a36Sopenharmony_ci err = tcf_block_get(&q->block, &q->filter_list, sch, extack); 43762306a36Sopenharmony_ci if (err) 43862306a36Sopenharmony_ci goto init_failure; 43962306a36Sopenharmony_ci 44062306a36Sopenharmony_ci q->flows = kvcalloc(q->flows_cnt, sizeof(struct fq_pie_flow), 44162306a36Sopenharmony_ci GFP_KERNEL); 44262306a36Sopenharmony_ci if (!q->flows) { 44362306a36Sopenharmony_ci err = -ENOMEM; 44462306a36Sopenharmony_ci goto init_failure; 44562306a36Sopenharmony_ci } 44662306a36Sopenharmony_ci for (idx = 0; idx < q->flows_cnt; idx++) { 44762306a36Sopenharmony_ci struct fq_pie_flow *flow = q->flows + idx; 44862306a36Sopenharmony_ci 44962306a36Sopenharmony_ci INIT_LIST_HEAD(&flow->flowchain); 45062306a36Sopenharmony_ci pie_vars_init(&flow->vars); 45162306a36Sopenharmony_ci } 45262306a36Sopenharmony_ci 45362306a36Sopenharmony_ci mod_timer(&q->adapt_timer, jiffies + HZ / 2); 45462306a36Sopenharmony_ci 45562306a36Sopenharmony_ci return 0; 45662306a36Sopenharmony_ci 45762306a36Sopenharmony_ciinit_failure: 45862306a36Sopenharmony_ci q->flows_cnt = 0; 45962306a36Sopenharmony_ci 46062306a36Sopenharmony_ci return err; 46162306a36Sopenharmony_ci} 46262306a36Sopenharmony_ci 46362306a36Sopenharmony_cistatic int fq_pie_dump(struct Qdisc *sch, struct sk_buff *skb) 46462306a36Sopenharmony_ci{ 46562306a36Sopenharmony_ci struct fq_pie_sched_data *q = qdisc_priv(sch); 46662306a36Sopenharmony_ci struct nlattr *opts; 46762306a36Sopenharmony_ci 46862306a36Sopenharmony_ci opts = nla_nest_start(skb, TCA_OPTIONS); 46962306a36Sopenharmony_ci if (!opts) 47062306a36Sopenharmony_ci return -EMSGSIZE; 47162306a36Sopenharmony_ci 47262306a36Sopenharmony_ci /* convert target from pschedtime to us */ 47362306a36Sopenharmony_ci if (nla_put_u32(skb, TCA_FQ_PIE_LIMIT, sch->limit) || 47462306a36Sopenharmony_ci nla_put_u32(skb, TCA_FQ_PIE_FLOWS, q->flows_cnt) || 47562306a36Sopenharmony_ci nla_put_u32(skb, TCA_FQ_PIE_TARGET, 47662306a36Sopenharmony_ci ((u32)PSCHED_TICKS2NS(q->p_params.target)) / 47762306a36Sopenharmony_ci NSEC_PER_USEC) || 47862306a36Sopenharmony_ci nla_put_u32(skb, TCA_FQ_PIE_TUPDATE, 47962306a36Sopenharmony_ci jiffies_to_usecs(q->p_params.tupdate)) || 48062306a36Sopenharmony_ci nla_put_u32(skb, TCA_FQ_PIE_ALPHA, q->p_params.alpha) || 48162306a36Sopenharmony_ci nla_put_u32(skb, TCA_FQ_PIE_BETA, q->p_params.beta) || 48262306a36Sopenharmony_ci nla_put_u32(skb, TCA_FQ_PIE_QUANTUM, q->quantum) || 48362306a36Sopenharmony_ci nla_put_u32(skb, TCA_FQ_PIE_MEMORY_LIMIT, q->memory_limit) || 48462306a36Sopenharmony_ci nla_put_u32(skb, TCA_FQ_PIE_ECN_PROB, q->ecn_prob) || 48562306a36Sopenharmony_ci nla_put_u32(skb, TCA_FQ_PIE_ECN, q->p_params.ecn) || 48662306a36Sopenharmony_ci nla_put_u32(skb, TCA_FQ_PIE_BYTEMODE, q->p_params.bytemode) || 48762306a36Sopenharmony_ci nla_put_u32(skb, TCA_FQ_PIE_DQ_RATE_ESTIMATOR, 48862306a36Sopenharmony_ci q->p_params.dq_rate_estimator)) 48962306a36Sopenharmony_ci goto nla_put_failure; 49062306a36Sopenharmony_ci 49162306a36Sopenharmony_ci return nla_nest_end(skb, opts); 49262306a36Sopenharmony_ci 49362306a36Sopenharmony_cinla_put_failure: 49462306a36Sopenharmony_ci nla_nest_cancel(skb, opts); 49562306a36Sopenharmony_ci return -EMSGSIZE; 49662306a36Sopenharmony_ci} 49762306a36Sopenharmony_ci 49862306a36Sopenharmony_cistatic int fq_pie_dump_stats(struct Qdisc *sch, struct gnet_dump *d) 49962306a36Sopenharmony_ci{ 50062306a36Sopenharmony_ci struct fq_pie_sched_data *q = qdisc_priv(sch); 50162306a36Sopenharmony_ci struct tc_fq_pie_xstats st = { 50262306a36Sopenharmony_ci .packets_in = q->stats.packets_in, 50362306a36Sopenharmony_ci .overlimit = q->stats.overlimit, 50462306a36Sopenharmony_ci .overmemory = q->overmemory, 50562306a36Sopenharmony_ci .dropped = q->stats.dropped, 50662306a36Sopenharmony_ci .ecn_mark = q->stats.ecn_mark, 50762306a36Sopenharmony_ci .new_flow_count = q->new_flow_count, 50862306a36Sopenharmony_ci .memory_usage = q->memory_usage, 50962306a36Sopenharmony_ci }; 51062306a36Sopenharmony_ci struct list_head *pos; 51162306a36Sopenharmony_ci 51262306a36Sopenharmony_ci sch_tree_lock(sch); 51362306a36Sopenharmony_ci list_for_each(pos, &q->new_flows) 51462306a36Sopenharmony_ci st.new_flows_len++; 51562306a36Sopenharmony_ci 51662306a36Sopenharmony_ci list_for_each(pos, &q->old_flows) 51762306a36Sopenharmony_ci st.old_flows_len++; 51862306a36Sopenharmony_ci sch_tree_unlock(sch); 51962306a36Sopenharmony_ci 52062306a36Sopenharmony_ci return gnet_stats_copy_app(d, &st, sizeof(st)); 52162306a36Sopenharmony_ci} 52262306a36Sopenharmony_ci 52362306a36Sopenharmony_cistatic void fq_pie_reset(struct Qdisc *sch) 52462306a36Sopenharmony_ci{ 52562306a36Sopenharmony_ci struct fq_pie_sched_data *q = qdisc_priv(sch); 52662306a36Sopenharmony_ci u32 idx; 52762306a36Sopenharmony_ci 52862306a36Sopenharmony_ci INIT_LIST_HEAD(&q->new_flows); 52962306a36Sopenharmony_ci INIT_LIST_HEAD(&q->old_flows); 53062306a36Sopenharmony_ci for (idx = 0; idx < q->flows_cnt; idx++) { 53162306a36Sopenharmony_ci struct fq_pie_flow *flow = q->flows + idx; 53262306a36Sopenharmony_ci 53362306a36Sopenharmony_ci /* Removes all packets from flow */ 53462306a36Sopenharmony_ci rtnl_kfree_skbs(flow->head, flow->tail); 53562306a36Sopenharmony_ci flow->head = NULL; 53662306a36Sopenharmony_ci 53762306a36Sopenharmony_ci INIT_LIST_HEAD(&flow->flowchain); 53862306a36Sopenharmony_ci pie_vars_init(&flow->vars); 53962306a36Sopenharmony_ci } 54062306a36Sopenharmony_ci} 54162306a36Sopenharmony_ci 54262306a36Sopenharmony_cistatic void fq_pie_destroy(struct Qdisc *sch) 54362306a36Sopenharmony_ci{ 54462306a36Sopenharmony_ci struct fq_pie_sched_data *q = qdisc_priv(sch); 54562306a36Sopenharmony_ci 54662306a36Sopenharmony_ci tcf_block_put(q->block); 54762306a36Sopenharmony_ci q->p_params.tupdate = 0; 54862306a36Sopenharmony_ci del_timer_sync(&q->adapt_timer); 54962306a36Sopenharmony_ci kvfree(q->flows); 55062306a36Sopenharmony_ci} 55162306a36Sopenharmony_ci 55262306a36Sopenharmony_cistatic struct Qdisc_ops fq_pie_qdisc_ops __read_mostly = { 55362306a36Sopenharmony_ci .id = "fq_pie", 55462306a36Sopenharmony_ci .priv_size = sizeof(struct fq_pie_sched_data), 55562306a36Sopenharmony_ci .enqueue = fq_pie_qdisc_enqueue, 55662306a36Sopenharmony_ci .dequeue = fq_pie_qdisc_dequeue, 55762306a36Sopenharmony_ci .peek = qdisc_peek_dequeued, 55862306a36Sopenharmony_ci .init = fq_pie_init, 55962306a36Sopenharmony_ci .destroy = fq_pie_destroy, 56062306a36Sopenharmony_ci .reset = fq_pie_reset, 56162306a36Sopenharmony_ci .change = fq_pie_change, 56262306a36Sopenharmony_ci .dump = fq_pie_dump, 56362306a36Sopenharmony_ci .dump_stats = fq_pie_dump_stats, 56462306a36Sopenharmony_ci .owner = THIS_MODULE, 56562306a36Sopenharmony_ci}; 56662306a36Sopenharmony_ci 56762306a36Sopenharmony_cistatic int __init fq_pie_module_init(void) 56862306a36Sopenharmony_ci{ 56962306a36Sopenharmony_ci return register_qdisc(&fq_pie_qdisc_ops); 57062306a36Sopenharmony_ci} 57162306a36Sopenharmony_ci 57262306a36Sopenharmony_cistatic void __exit fq_pie_module_exit(void) 57362306a36Sopenharmony_ci{ 57462306a36Sopenharmony_ci unregister_qdisc(&fq_pie_qdisc_ops); 57562306a36Sopenharmony_ci} 57662306a36Sopenharmony_ci 57762306a36Sopenharmony_cimodule_init(fq_pie_module_init); 57862306a36Sopenharmony_cimodule_exit(fq_pie_module_exit); 57962306a36Sopenharmony_ci 58062306a36Sopenharmony_ciMODULE_DESCRIPTION("Flow Queue Proportional Integral controller Enhanced (FQ-PIE)"); 58162306a36Sopenharmony_ciMODULE_AUTHOR("Mohit P. Tahiliani"); 58262306a36Sopenharmony_ciMODULE_LICENSE("GPL"); 583