18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 28c2ecf20Sopenharmony_ci/* Flow Queue PIE discipline 38c2ecf20Sopenharmony_ci * 48c2ecf20Sopenharmony_ci * Copyright (C) 2019 Mohit P. Tahiliani <tahiliani@nitk.edu.in> 58c2ecf20Sopenharmony_ci * Copyright (C) 2019 Sachin D. Patil <sdp.sachin@gmail.com> 68c2ecf20Sopenharmony_ci * Copyright (C) 2019 V. Saicharan <vsaicharan1998@gmail.com> 78c2ecf20Sopenharmony_ci * Copyright (C) 2019 Mohit Bhasi <mohitbhasi1998@gmail.com> 88c2ecf20Sopenharmony_ci * Copyright (C) 2019 Leslie Monis <lesliemonis@gmail.com> 98c2ecf20Sopenharmony_ci * Copyright (C) 2019 Gautam Ramakrishnan <gautamramk@gmail.com> 108c2ecf20Sopenharmony_ci */ 118c2ecf20Sopenharmony_ci 128c2ecf20Sopenharmony_ci#include <linux/jhash.h> 138c2ecf20Sopenharmony_ci#include <linux/sizes.h> 148c2ecf20Sopenharmony_ci#include <linux/vmalloc.h> 158c2ecf20Sopenharmony_ci#include <net/pkt_cls.h> 168c2ecf20Sopenharmony_ci#include <net/pie.h> 178c2ecf20Sopenharmony_ci 188c2ecf20Sopenharmony_ci/* Flow Queue PIE 198c2ecf20Sopenharmony_ci * 208c2ecf20Sopenharmony_ci * Principles: 218c2ecf20Sopenharmony_ci * - Packets are classified on flows. 228c2ecf20Sopenharmony_ci * - This is a Stochastic model (as we use a hash, several flows might 238c2ecf20Sopenharmony_ci * be hashed to the same slot) 248c2ecf20Sopenharmony_ci * - Each flow has a PIE managed queue. 258c2ecf20Sopenharmony_ci * - Flows are linked onto two (Round Robin) lists, 268c2ecf20Sopenharmony_ci * so that new flows have priority on old ones. 278c2ecf20Sopenharmony_ci * - For a given flow, packets are not reordered. 288c2ecf20Sopenharmony_ci * - Drops during enqueue only. 298c2ecf20Sopenharmony_ci * - ECN capability is off by default. 308c2ecf20Sopenharmony_ci * - ECN threshold (if ECN is enabled) is at 10% by default. 318c2ecf20Sopenharmony_ci * - Uses timestamps to calculate queue delay by default. 328c2ecf20Sopenharmony_ci */ 338c2ecf20Sopenharmony_ci 348c2ecf20Sopenharmony_ci/** 358c2ecf20Sopenharmony_ci * struct fq_pie_flow - contains data for each flow 368c2ecf20Sopenharmony_ci * @vars: pie vars associated with the flow 378c2ecf20Sopenharmony_ci * @deficit: number of remaining byte credits 388c2ecf20Sopenharmony_ci * @backlog: size of data in the flow 398c2ecf20Sopenharmony_ci * @qlen: number of packets in the flow 408c2ecf20Sopenharmony_ci * @flowchain: flowchain for the flow 418c2ecf20Sopenharmony_ci * @head: first packet in the flow 428c2ecf20Sopenharmony_ci * @tail: last packet in the flow 438c2ecf20Sopenharmony_ci */ 448c2ecf20Sopenharmony_cistruct fq_pie_flow { 458c2ecf20Sopenharmony_ci struct pie_vars vars; 468c2ecf20Sopenharmony_ci s32 deficit; 478c2ecf20Sopenharmony_ci u32 backlog; 488c2ecf20Sopenharmony_ci u32 qlen; 498c2ecf20Sopenharmony_ci struct list_head flowchain; 508c2ecf20Sopenharmony_ci struct sk_buff *head; 518c2ecf20Sopenharmony_ci struct sk_buff *tail; 528c2ecf20Sopenharmony_ci}; 538c2ecf20Sopenharmony_ci 548c2ecf20Sopenharmony_cistruct fq_pie_sched_data { 558c2ecf20Sopenharmony_ci struct tcf_proto __rcu *filter_list; /* optional external classifier */ 568c2ecf20Sopenharmony_ci struct tcf_block *block; 578c2ecf20Sopenharmony_ci struct fq_pie_flow *flows; 588c2ecf20Sopenharmony_ci struct Qdisc *sch; 598c2ecf20Sopenharmony_ci struct list_head old_flows; 608c2ecf20Sopenharmony_ci struct list_head new_flows; 618c2ecf20Sopenharmony_ci struct pie_params p_params; 628c2ecf20Sopenharmony_ci u32 ecn_prob; 638c2ecf20Sopenharmony_ci u32 flows_cnt; 648c2ecf20Sopenharmony_ci u32 flows_cursor; 658c2ecf20Sopenharmony_ci u32 quantum; 668c2ecf20Sopenharmony_ci u32 memory_limit; 678c2ecf20Sopenharmony_ci u32 new_flow_count; 688c2ecf20Sopenharmony_ci u32 memory_usage; 698c2ecf20Sopenharmony_ci u32 overmemory; 708c2ecf20Sopenharmony_ci struct pie_stats stats; 718c2ecf20Sopenharmony_ci struct timer_list adapt_timer; 728c2ecf20Sopenharmony_ci}; 738c2ecf20Sopenharmony_ci 748c2ecf20Sopenharmony_cistatic unsigned int fq_pie_hash(const struct fq_pie_sched_data *q, 758c2ecf20Sopenharmony_ci struct sk_buff *skb) 768c2ecf20Sopenharmony_ci{ 778c2ecf20Sopenharmony_ci return reciprocal_scale(skb_get_hash(skb), q->flows_cnt); 788c2ecf20Sopenharmony_ci} 798c2ecf20Sopenharmony_ci 808c2ecf20Sopenharmony_cistatic unsigned int fq_pie_classify(struct sk_buff *skb, struct Qdisc *sch, 818c2ecf20Sopenharmony_ci int *qerr) 828c2ecf20Sopenharmony_ci{ 838c2ecf20Sopenharmony_ci struct fq_pie_sched_data *q = qdisc_priv(sch); 848c2ecf20Sopenharmony_ci struct tcf_proto *filter; 858c2ecf20Sopenharmony_ci struct tcf_result res; 868c2ecf20Sopenharmony_ci int result; 878c2ecf20Sopenharmony_ci 888c2ecf20Sopenharmony_ci if (TC_H_MAJ(skb->priority) == sch->handle && 898c2ecf20Sopenharmony_ci TC_H_MIN(skb->priority) > 0 && 908c2ecf20Sopenharmony_ci TC_H_MIN(skb->priority) <= q->flows_cnt) 918c2ecf20Sopenharmony_ci return TC_H_MIN(skb->priority); 928c2ecf20Sopenharmony_ci 938c2ecf20Sopenharmony_ci filter = rcu_dereference_bh(q->filter_list); 948c2ecf20Sopenharmony_ci if (!filter) 958c2ecf20Sopenharmony_ci return fq_pie_hash(q, skb) + 1; 968c2ecf20Sopenharmony_ci 978c2ecf20Sopenharmony_ci *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; 988c2ecf20Sopenharmony_ci result = tcf_classify(skb, filter, &res, false); 998c2ecf20Sopenharmony_ci if (result >= 0) { 1008c2ecf20Sopenharmony_ci#ifdef CONFIG_NET_CLS_ACT 1018c2ecf20Sopenharmony_ci switch (result) { 1028c2ecf20Sopenharmony_ci case TC_ACT_STOLEN: 1038c2ecf20Sopenharmony_ci case TC_ACT_QUEUED: 1048c2ecf20Sopenharmony_ci case TC_ACT_TRAP: 1058c2ecf20Sopenharmony_ci *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; 1068c2ecf20Sopenharmony_ci fallthrough; 1078c2ecf20Sopenharmony_ci case TC_ACT_SHOT: 1088c2ecf20Sopenharmony_ci return 0; 1098c2ecf20Sopenharmony_ci } 1108c2ecf20Sopenharmony_ci#endif 1118c2ecf20Sopenharmony_ci if (TC_H_MIN(res.classid) <= q->flows_cnt) 1128c2ecf20Sopenharmony_ci return TC_H_MIN(res.classid); 1138c2ecf20Sopenharmony_ci } 1148c2ecf20Sopenharmony_ci return 0; 1158c2ecf20Sopenharmony_ci} 1168c2ecf20Sopenharmony_ci 1178c2ecf20Sopenharmony_ci/* add skb to flow queue (tail add) */ 1188c2ecf20Sopenharmony_cistatic inline void flow_queue_add(struct fq_pie_flow *flow, 1198c2ecf20Sopenharmony_ci struct sk_buff *skb) 1208c2ecf20Sopenharmony_ci{ 1218c2ecf20Sopenharmony_ci if (!flow->head) 1228c2ecf20Sopenharmony_ci flow->head = skb; 1238c2ecf20Sopenharmony_ci else 1248c2ecf20Sopenharmony_ci flow->tail->next = skb; 1258c2ecf20Sopenharmony_ci flow->tail = skb; 1268c2ecf20Sopenharmony_ci skb->next = NULL; 1278c2ecf20Sopenharmony_ci} 1288c2ecf20Sopenharmony_ci 1298c2ecf20Sopenharmony_cistatic int fq_pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, 1308c2ecf20Sopenharmony_ci struct sk_buff **to_free) 1318c2ecf20Sopenharmony_ci{ 1328c2ecf20Sopenharmony_ci struct fq_pie_sched_data *q = qdisc_priv(sch); 1338c2ecf20Sopenharmony_ci struct fq_pie_flow *sel_flow; 1348c2ecf20Sopenharmony_ci int ret; 1358c2ecf20Sopenharmony_ci u8 memory_limited = false; 1368c2ecf20Sopenharmony_ci u8 enqueue = false; 1378c2ecf20Sopenharmony_ci u32 pkt_len; 1388c2ecf20Sopenharmony_ci u32 idx; 1398c2ecf20Sopenharmony_ci 1408c2ecf20Sopenharmony_ci /* Classifies packet into corresponding flow */ 1418c2ecf20Sopenharmony_ci idx = fq_pie_classify(skb, sch, &ret); 1428c2ecf20Sopenharmony_ci if (idx == 0) { 1438c2ecf20Sopenharmony_ci if (ret & __NET_XMIT_BYPASS) 1448c2ecf20Sopenharmony_ci qdisc_qstats_drop(sch); 1458c2ecf20Sopenharmony_ci __qdisc_drop(skb, to_free); 1468c2ecf20Sopenharmony_ci return ret; 1478c2ecf20Sopenharmony_ci } 1488c2ecf20Sopenharmony_ci idx--; 1498c2ecf20Sopenharmony_ci 1508c2ecf20Sopenharmony_ci sel_flow = &q->flows[idx]; 1518c2ecf20Sopenharmony_ci /* Checks whether adding a new packet would exceed memory limit */ 1528c2ecf20Sopenharmony_ci get_pie_cb(skb)->mem_usage = skb->truesize; 1538c2ecf20Sopenharmony_ci memory_limited = q->memory_usage > q->memory_limit + skb->truesize; 1548c2ecf20Sopenharmony_ci 1558c2ecf20Sopenharmony_ci /* Checks if the qdisc is full */ 1568c2ecf20Sopenharmony_ci if (unlikely(qdisc_qlen(sch) >= sch->limit)) { 1578c2ecf20Sopenharmony_ci q->stats.overlimit++; 1588c2ecf20Sopenharmony_ci goto out; 1598c2ecf20Sopenharmony_ci } else if (unlikely(memory_limited)) { 1608c2ecf20Sopenharmony_ci q->overmemory++; 1618c2ecf20Sopenharmony_ci } 1628c2ecf20Sopenharmony_ci 1638c2ecf20Sopenharmony_ci if (!pie_drop_early(sch, &q->p_params, &sel_flow->vars, 1648c2ecf20Sopenharmony_ci sel_flow->backlog, skb->len)) { 1658c2ecf20Sopenharmony_ci enqueue = true; 1668c2ecf20Sopenharmony_ci } else if (q->p_params.ecn && 1678c2ecf20Sopenharmony_ci sel_flow->vars.prob <= (MAX_PROB / 100) * q->ecn_prob && 1688c2ecf20Sopenharmony_ci INET_ECN_set_ce(skb)) { 1698c2ecf20Sopenharmony_ci /* If packet is ecn capable, mark it if drop probability 1708c2ecf20Sopenharmony_ci * is lower than the parameter ecn_prob, else drop it. 1718c2ecf20Sopenharmony_ci */ 1728c2ecf20Sopenharmony_ci q->stats.ecn_mark++; 1738c2ecf20Sopenharmony_ci enqueue = true; 1748c2ecf20Sopenharmony_ci } 1758c2ecf20Sopenharmony_ci if (enqueue) { 1768c2ecf20Sopenharmony_ci /* Set enqueue time only when dq_rate_estimator is disabled. */ 1778c2ecf20Sopenharmony_ci if (!q->p_params.dq_rate_estimator) 1788c2ecf20Sopenharmony_ci pie_set_enqueue_time(skb); 1798c2ecf20Sopenharmony_ci 1808c2ecf20Sopenharmony_ci pkt_len = qdisc_pkt_len(skb); 1818c2ecf20Sopenharmony_ci q->stats.packets_in++; 1828c2ecf20Sopenharmony_ci q->memory_usage += skb->truesize; 1838c2ecf20Sopenharmony_ci sch->qstats.backlog += pkt_len; 1848c2ecf20Sopenharmony_ci sch->q.qlen++; 1858c2ecf20Sopenharmony_ci flow_queue_add(sel_flow, skb); 1868c2ecf20Sopenharmony_ci if (list_empty(&sel_flow->flowchain)) { 1878c2ecf20Sopenharmony_ci list_add_tail(&sel_flow->flowchain, &q->new_flows); 1888c2ecf20Sopenharmony_ci q->new_flow_count++; 1898c2ecf20Sopenharmony_ci sel_flow->deficit = q->quantum; 1908c2ecf20Sopenharmony_ci sel_flow->qlen = 0; 1918c2ecf20Sopenharmony_ci sel_flow->backlog = 0; 1928c2ecf20Sopenharmony_ci } 1938c2ecf20Sopenharmony_ci sel_flow->qlen++; 1948c2ecf20Sopenharmony_ci sel_flow->backlog += pkt_len; 1958c2ecf20Sopenharmony_ci return NET_XMIT_SUCCESS; 1968c2ecf20Sopenharmony_ci } 1978c2ecf20Sopenharmony_ciout: 1988c2ecf20Sopenharmony_ci q->stats.dropped++; 1998c2ecf20Sopenharmony_ci sel_flow->vars.accu_prob = 0; 2008c2ecf20Sopenharmony_ci __qdisc_drop(skb, to_free); 2018c2ecf20Sopenharmony_ci qdisc_qstats_drop(sch); 2028c2ecf20Sopenharmony_ci return NET_XMIT_CN; 2038c2ecf20Sopenharmony_ci} 2048c2ecf20Sopenharmony_ci 2058c2ecf20Sopenharmony_cistatic struct netlink_range_validation fq_pie_q_range = { 2068c2ecf20Sopenharmony_ci .min = 1, 2078c2ecf20Sopenharmony_ci .max = 1 << 20, 2088c2ecf20Sopenharmony_ci}; 2098c2ecf20Sopenharmony_ci 2108c2ecf20Sopenharmony_cistatic const struct nla_policy fq_pie_policy[TCA_FQ_PIE_MAX + 1] = { 2118c2ecf20Sopenharmony_ci [TCA_FQ_PIE_LIMIT] = {.type = NLA_U32}, 2128c2ecf20Sopenharmony_ci [TCA_FQ_PIE_FLOWS] = {.type = NLA_U32}, 2138c2ecf20Sopenharmony_ci [TCA_FQ_PIE_TARGET] = {.type = NLA_U32}, 2148c2ecf20Sopenharmony_ci [TCA_FQ_PIE_TUPDATE] = {.type = NLA_U32}, 2158c2ecf20Sopenharmony_ci [TCA_FQ_PIE_ALPHA] = {.type = NLA_U32}, 2168c2ecf20Sopenharmony_ci [TCA_FQ_PIE_BETA] = {.type = NLA_U32}, 2178c2ecf20Sopenharmony_ci [TCA_FQ_PIE_QUANTUM] = 2188c2ecf20Sopenharmony_ci NLA_POLICY_FULL_RANGE(NLA_U32, &fq_pie_q_range), 2198c2ecf20Sopenharmony_ci [TCA_FQ_PIE_MEMORY_LIMIT] = {.type = NLA_U32}, 2208c2ecf20Sopenharmony_ci [TCA_FQ_PIE_ECN_PROB] = {.type = NLA_U32}, 2218c2ecf20Sopenharmony_ci [TCA_FQ_PIE_ECN] = {.type = NLA_U32}, 2228c2ecf20Sopenharmony_ci [TCA_FQ_PIE_BYTEMODE] = {.type = NLA_U32}, 2238c2ecf20Sopenharmony_ci [TCA_FQ_PIE_DQ_RATE_ESTIMATOR] = {.type = NLA_U32}, 2248c2ecf20Sopenharmony_ci}; 2258c2ecf20Sopenharmony_ci 2268c2ecf20Sopenharmony_cistatic inline struct sk_buff *dequeue_head(struct fq_pie_flow *flow) 2278c2ecf20Sopenharmony_ci{ 2288c2ecf20Sopenharmony_ci struct sk_buff *skb = flow->head; 2298c2ecf20Sopenharmony_ci 2308c2ecf20Sopenharmony_ci flow->head = skb->next; 2318c2ecf20Sopenharmony_ci skb->next = NULL; 2328c2ecf20Sopenharmony_ci return skb; 2338c2ecf20Sopenharmony_ci} 2348c2ecf20Sopenharmony_ci 2358c2ecf20Sopenharmony_cistatic struct sk_buff *fq_pie_qdisc_dequeue(struct Qdisc *sch) 2368c2ecf20Sopenharmony_ci{ 2378c2ecf20Sopenharmony_ci struct fq_pie_sched_data *q = qdisc_priv(sch); 2388c2ecf20Sopenharmony_ci struct sk_buff *skb = NULL; 2398c2ecf20Sopenharmony_ci struct fq_pie_flow *flow; 2408c2ecf20Sopenharmony_ci struct list_head *head; 2418c2ecf20Sopenharmony_ci u32 pkt_len; 2428c2ecf20Sopenharmony_ci 2438c2ecf20Sopenharmony_cibegin: 2448c2ecf20Sopenharmony_ci head = &q->new_flows; 2458c2ecf20Sopenharmony_ci if (list_empty(head)) { 2468c2ecf20Sopenharmony_ci head = &q->old_flows; 2478c2ecf20Sopenharmony_ci if (list_empty(head)) 2488c2ecf20Sopenharmony_ci return NULL; 2498c2ecf20Sopenharmony_ci } 2508c2ecf20Sopenharmony_ci 2518c2ecf20Sopenharmony_ci flow = list_first_entry(head, struct fq_pie_flow, flowchain); 2528c2ecf20Sopenharmony_ci /* Flow has exhausted all its credits */ 2538c2ecf20Sopenharmony_ci if (flow->deficit <= 0) { 2548c2ecf20Sopenharmony_ci flow->deficit += q->quantum; 2558c2ecf20Sopenharmony_ci list_move_tail(&flow->flowchain, &q->old_flows); 2568c2ecf20Sopenharmony_ci goto begin; 2578c2ecf20Sopenharmony_ci } 2588c2ecf20Sopenharmony_ci 2598c2ecf20Sopenharmony_ci if (flow->head) { 2608c2ecf20Sopenharmony_ci skb = dequeue_head(flow); 2618c2ecf20Sopenharmony_ci pkt_len = qdisc_pkt_len(skb); 2628c2ecf20Sopenharmony_ci sch->qstats.backlog -= pkt_len; 2638c2ecf20Sopenharmony_ci sch->q.qlen--; 2648c2ecf20Sopenharmony_ci qdisc_bstats_update(sch, skb); 2658c2ecf20Sopenharmony_ci } 2668c2ecf20Sopenharmony_ci 2678c2ecf20Sopenharmony_ci if (!skb) { 2688c2ecf20Sopenharmony_ci /* force a pass through old_flows to prevent starvation */ 2698c2ecf20Sopenharmony_ci if (head == &q->new_flows && !list_empty(&q->old_flows)) 2708c2ecf20Sopenharmony_ci list_move_tail(&flow->flowchain, &q->old_flows); 2718c2ecf20Sopenharmony_ci else 2728c2ecf20Sopenharmony_ci list_del_init(&flow->flowchain); 2738c2ecf20Sopenharmony_ci goto begin; 2748c2ecf20Sopenharmony_ci } 2758c2ecf20Sopenharmony_ci 2768c2ecf20Sopenharmony_ci flow->qlen--; 2778c2ecf20Sopenharmony_ci flow->deficit -= pkt_len; 2788c2ecf20Sopenharmony_ci flow->backlog -= pkt_len; 2798c2ecf20Sopenharmony_ci q->memory_usage -= get_pie_cb(skb)->mem_usage; 2808c2ecf20Sopenharmony_ci pie_process_dequeue(skb, &q->p_params, &flow->vars, flow->backlog); 2818c2ecf20Sopenharmony_ci return skb; 2828c2ecf20Sopenharmony_ci} 2838c2ecf20Sopenharmony_ci 2848c2ecf20Sopenharmony_cistatic int fq_pie_change(struct Qdisc *sch, struct nlattr *opt, 2858c2ecf20Sopenharmony_ci struct netlink_ext_ack *extack) 2868c2ecf20Sopenharmony_ci{ 2878c2ecf20Sopenharmony_ci struct fq_pie_sched_data *q = qdisc_priv(sch); 2888c2ecf20Sopenharmony_ci struct nlattr *tb[TCA_FQ_PIE_MAX + 1]; 2898c2ecf20Sopenharmony_ci unsigned int len_dropped = 0; 2908c2ecf20Sopenharmony_ci unsigned int num_dropped = 0; 2918c2ecf20Sopenharmony_ci int err; 2928c2ecf20Sopenharmony_ci 2938c2ecf20Sopenharmony_ci if (!opt) 2948c2ecf20Sopenharmony_ci return -EINVAL; 2958c2ecf20Sopenharmony_ci 2968c2ecf20Sopenharmony_ci err = nla_parse_nested(tb, TCA_FQ_PIE_MAX, opt, fq_pie_policy, extack); 2978c2ecf20Sopenharmony_ci if (err < 0) 2988c2ecf20Sopenharmony_ci return err; 2998c2ecf20Sopenharmony_ci 3008c2ecf20Sopenharmony_ci sch_tree_lock(sch); 3018c2ecf20Sopenharmony_ci if (tb[TCA_FQ_PIE_LIMIT]) { 3028c2ecf20Sopenharmony_ci u32 limit = nla_get_u32(tb[TCA_FQ_PIE_LIMIT]); 3038c2ecf20Sopenharmony_ci 3048c2ecf20Sopenharmony_ci q->p_params.limit = limit; 3058c2ecf20Sopenharmony_ci sch->limit = limit; 3068c2ecf20Sopenharmony_ci } 3078c2ecf20Sopenharmony_ci if (tb[TCA_FQ_PIE_FLOWS]) { 3088c2ecf20Sopenharmony_ci if (q->flows) { 3098c2ecf20Sopenharmony_ci NL_SET_ERR_MSG_MOD(extack, 3108c2ecf20Sopenharmony_ci "Number of flows cannot be changed"); 3118c2ecf20Sopenharmony_ci goto flow_error; 3128c2ecf20Sopenharmony_ci } 3138c2ecf20Sopenharmony_ci q->flows_cnt = nla_get_u32(tb[TCA_FQ_PIE_FLOWS]); 3148c2ecf20Sopenharmony_ci if (!q->flows_cnt || q->flows_cnt > 65536) { 3158c2ecf20Sopenharmony_ci NL_SET_ERR_MSG_MOD(extack, 3168c2ecf20Sopenharmony_ci "Number of flows must range in [1..65536]"); 3178c2ecf20Sopenharmony_ci goto flow_error; 3188c2ecf20Sopenharmony_ci } 3198c2ecf20Sopenharmony_ci } 3208c2ecf20Sopenharmony_ci 3218c2ecf20Sopenharmony_ci /* convert from microseconds to pschedtime */ 3228c2ecf20Sopenharmony_ci if (tb[TCA_FQ_PIE_TARGET]) { 3238c2ecf20Sopenharmony_ci /* target is in us */ 3248c2ecf20Sopenharmony_ci u32 target = nla_get_u32(tb[TCA_FQ_PIE_TARGET]); 3258c2ecf20Sopenharmony_ci 3268c2ecf20Sopenharmony_ci /* convert to pschedtime */ 3278c2ecf20Sopenharmony_ci q->p_params.target = 3288c2ecf20Sopenharmony_ci PSCHED_NS2TICKS((u64)target * NSEC_PER_USEC); 3298c2ecf20Sopenharmony_ci } 3308c2ecf20Sopenharmony_ci 3318c2ecf20Sopenharmony_ci /* tupdate is in jiffies */ 3328c2ecf20Sopenharmony_ci if (tb[TCA_FQ_PIE_TUPDATE]) 3338c2ecf20Sopenharmony_ci q->p_params.tupdate = 3348c2ecf20Sopenharmony_ci usecs_to_jiffies(nla_get_u32(tb[TCA_FQ_PIE_TUPDATE])); 3358c2ecf20Sopenharmony_ci 3368c2ecf20Sopenharmony_ci if (tb[TCA_FQ_PIE_ALPHA]) 3378c2ecf20Sopenharmony_ci q->p_params.alpha = nla_get_u32(tb[TCA_FQ_PIE_ALPHA]); 3388c2ecf20Sopenharmony_ci 3398c2ecf20Sopenharmony_ci if (tb[TCA_FQ_PIE_BETA]) 3408c2ecf20Sopenharmony_ci q->p_params.beta = nla_get_u32(tb[TCA_FQ_PIE_BETA]); 3418c2ecf20Sopenharmony_ci 3428c2ecf20Sopenharmony_ci if (tb[TCA_FQ_PIE_QUANTUM]) 3438c2ecf20Sopenharmony_ci q->quantum = nla_get_u32(tb[TCA_FQ_PIE_QUANTUM]); 3448c2ecf20Sopenharmony_ci 3458c2ecf20Sopenharmony_ci if (tb[TCA_FQ_PIE_MEMORY_LIMIT]) 3468c2ecf20Sopenharmony_ci q->memory_limit = nla_get_u32(tb[TCA_FQ_PIE_MEMORY_LIMIT]); 3478c2ecf20Sopenharmony_ci 3488c2ecf20Sopenharmony_ci if (tb[TCA_FQ_PIE_ECN_PROB]) 3498c2ecf20Sopenharmony_ci q->ecn_prob = nla_get_u32(tb[TCA_FQ_PIE_ECN_PROB]); 3508c2ecf20Sopenharmony_ci 3518c2ecf20Sopenharmony_ci if (tb[TCA_FQ_PIE_ECN]) 3528c2ecf20Sopenharmony_ci q->p_params.ecn = nla_get_u32(tb[TCA_FQ_PIE_ECN]); 3538c2ecf20Sopenharmony_ci 3548c2ecf20Sopenharmony_ci if (tb[TCA_FQ_PIE_BYTEMODE]) 3558c2ecf20Sopenharmony_ci q->p_params.bytemode = nla_get_u32(tb[TCA_FQ_PIE_BYTEMODE]); 3568c2ecf20Sopenharmony_ci 3578c2ecf20Sopenharmony_ci if (tb[TCA_FQ_PIE_DQ_RATE_ESTIMATOR]) 3588c2ecf20Sopenharmony_ci q->p_params.dq_rate_estimator = 3598c2ecf20Sopenharmony_ci nla_get_u32(tb[TCA_FQ_PIE_DQ_RATE_ESTIMATOR]); 3608c2ecf20Sopenharmony_ci 3618c2ecf20Sopenharmony_ci /* Drop excess packets if new limit is lower */ 3628c2ecf20Sopenharmony_ci while (sch->q.qlen > sch->limit) { 3638c2ecf20Sopenharmony_ci struct sk_buff *skb = fq_pie_qdisc_dequeue(sch); 3648c2ecf20Sopenharmony_ci 3658c2ecf20Sopenharmony_ci len_dropped += qdisc_pkt_len(skb); 3668c2ecf20Sopenharmony_ci num_dropped += 1; 3678c2ecf20Sopenharmony_ci rtnl_kfree_skbs(skb, skb); 3688c2ecf20Sopenharmony_ci } 3698c2ecf20Sopenharmony_ci qdisc_tree_reduce_backlog(sch, num_dropped, len_dropped); 3708c2ecf20Sopenharmony_ci 3718c2ecf20Sopenharmony_ci sch_tree_unlock(sch); 3728c2ecf20Sopenharmony_ci return 0; 3738c2ecf20Sopenharmony_ci 3748c2ecf20Sopenharmony_ciflow_error: 3758c2ecf20Sopenharmony_ci sch_tree_unlock(sch); 3768c2ecf20Sopenharmony_ci return -EINVAL; 3778c2ecf20Sopenharmony_ci} 3788c2ecf20Sopenharmony_ci 3798c2ecf20Sopenharmony_cistatic void fq_pie_timer(struct timer_list *t) 3808c2ecf20Sopenharmony_ci{ 3818c2ecf20Sopenharmony_ci struct fq_pie_sched_data *q = from_timer(q, t, adapt_timer); 3828c2ecf20Sopenharmony_ci unsigned long next, tupdate; 3838c2ecf20Sopenharmony_ci struct Qdisc *sch = q->sch; 3848c2ecf20Sopenharmony_ci spinlock_t *root_lock; /* to lock qdisc for probability calculations */ 3858c2ecf20Sopenharmony_ci int max_cnt, i; 3868c2ecf20Sopenharmony_ci 3878c2ecf20Sopenharmony_ci root_lock = qdisc_lock(qdisc_root_sleeping(sch)); 3888c2ecf20Sopenharmony_ci spin_lock(root_lock); 3898c2ecf20Sopenharmony_ci 3908c2ecf20Sopenharmony_ci /* Limit this expensive loop to 2048 flows per round. */ 3918c2ecf20Sopenharmony_ci max_cnt = min_t(int, q->flows_cnt - q->flows_cursor, 2048); 3928c2ecf20Sopenharmony_ci for (i = 0; i < max_cnt; i++) { 3938c2ecf20Sopenharmony_ci pie_calculate_probability(&q->p_params, 3948c2ecf20Sopenharmony_ci &q->flows[q->flows_cursor].vars, 3958c2ecf20Sopenharmony_ci q->flows[q->flows_cursor].backlog); 3968c2ecf20Sopenharmony_ci q->flows_cursor++; 3978c2ecf20Sopenharmony_ci } 3988c2ecf20Sopenharmony_ci 3998c2ecf20Sopenharmony_ci tupdate = q->p_params.tupdate; 4008c2ecf20Sopenharmony_ci next = 0; 4018c2ecf20Sopenharmony_ci if (q->flows_cursor >= q->flows_cnt) { 4028c2ecf20Sopenharmony_ci q->flows_cursor = 0; 4038c2ecf20Sopenharmony_ci next = tupdate; 4048c2ecf20Sopenharmony_ci } 4058c2ecf20Sopenharmony_ci if (tupdate) 4068c2ecf20Sopenharmony_ci mod_timer(&q->adapt_timer, jiffies + next); 4078c2ecf20Sopenharmony_ci spin_unlock(root_lock); 4088c2ecf20Sopenharmony_ci} 4098c2ecf20Sopenharmony_ci 4108c2ecf20Sopenharmony_cistatic int fq_pie_init(struct Qdisc *sch, struct nlattr *opt, 4118c2ecf20Sopenharmony_ci struct netlink_ext_ack *extack) 4128c2ecf20Sopenharmony_ci{ 4138c2ecf20Sopenharmony_ci struct fq_pie_sched_data *q = qdisc_priv(sch); 4148c2ecf20Sopenharmony_ci int err; 4158c2ecf20Sopenharmony_ci u32 idx; 4168c2ecf20Sopenharmony_ci 4178c2ecf20Sopenharmony_ci pie_params_init(&q->p_params); 4188c2ecf20Sopenharmony_ci sch->limit = 10 * 1024; 4198c2ecf20Sopenharmony_ci q->p_params.limit = sch->limit; 4208c2ecf20Sopenharmony_ci q->quantum = psched_mtu(qdisc_dev(sch)); 4218c2ecf20Sopenharmony_ci q->sch = sch; 4228c2ecf20Sopenharmony_ci q->ecn_prob = 10; 4238c2ecf20Sopenharmony_ci q->flows_cnt = 1024; 4248c2ecf20Sopenharmony_ci q->memory_limit = SZ_32M; 4258c2ecf20Sopenharmony_ci 4268c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&q->new_flows); 4278c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&q->old_flows); 4288c2ecf20Sopenharmony_ci timer_setup(&q->adapt_timer, fq_pie_timer, 0); 4298c2ecf20Sopenharmony_ci 4308c2ecf20Sopenharmony_ci if (opt) { 4318c2ecf20Sopenharmony_ci err = fq_pie_change(sch, opt, extack); 4328c2ecf20Sopenharmony_ci 4338c2ecf20Sopenharmony_ci if (err) 4348c2ecf20Sopenharmony_ci return err; 4358c2ecf20Sopenharmony_ci } 4368c2ecf20Sopenharmony_ci 4378c2ecf20Sopenharmony_ci err = tcf_block_get(&q->block, &q->filter_list, sch, extack); 4388c2ecf20Sopenharmony_ci if (err) 4398c2ecf20Sopenharmony_ci goto init_failure; 4408c2ecf20Sopenharmony_ci 4418c2ecf20Sopenharmony_ci q->flows = kvcalloc(q->flows_cnt, sizeof(struct fq_pie_flow), 4428c2ecf20Sopenharmony_ci GFP_KERNEL); 4438c2ecf20Sopenharmony_ci if (!q->flows) { 4448c2ecf20Sopenharmony_ci err = -ENOMEM; 4458c2ecf20Sopenharmony_ci goto init_failure; 4468c2ecf20Sopenharmony_ci } 4478c2ecf20Sopenharmony_ci for (idx = 0; idx < q->flows_cnt; idx++) { 4488c2ecf20Sopenharmony_ci struct fq_pie_flow *flow = q->flows + idx; 4498c2ecf20Sopenharmony_ci 4508c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&flow->flowchain); 4518c2ecf20Sopenharmony_ci pie_vars_init(&flow->vars); 4528c2ecf20Sopenharmony_ci } 4538c2ecf20Sopenharmony_ci 4548c2ecf20Sopenharmony_ci mod_timer(&q->adapt_timer, jiffies + HZ / 2); 4558c2ecf20Sopenharmony_ci 4568c2ecf20Sopenharmony_ci return 0; 4578c2ecf20Sopenharmony_ci 4588c2ecf20Sopenharmony_ciinit_failure: 4598c2ecf20Sopenharmony_ci q->flows_cnt = 0; 4608c2ecf20Sopenharmony_ci 4618c2ecf20Sopenharmony_ci return err; 4628c2ecf20Sopenharmony_ci} 4638c2ecf20Sopenharmony_ci 4648c2ecf20Sopenharmony_cistatic int fq_pie_dump(struct Qdisc *sch, struct sk_buff *skb) 4658c2ecf20Sopenharmony_ci{ 4668c2ecf20Sopenharmony_ci struct fq_pie_sched_data *q = qdisc_priv(sch); 4678c2ecf20Sopenharmony_ci struct nlattr *opts; 4688c2ecf20Sopenharmony_ci 4698c2ecf20Sopenharmony_ci opts = nla_nest_start(skb, TCA_OPTIONS); 4708c2ecf20Sopenharmony_ci if (!opts) 4718c2ecf20Sopenharmony_ci return -EMSGSIZE; 4728c2ecf20Sopenharmony_ci 4738c2ecf20Sopenharmony_ci /* convert target from pschedtime to us */ 4748c2ecf20Sopenharmony_ci if (nla_put_u32(skb, TCA_FQ_PIE_LIMIT, sch->limit) || 4758c2ecf20Sopenharmony_ci nla_put_u32(skb, TCA_FQ_PIE_FLOWS, q->flows_cnt) || 4768c2ecf20Sopenharmony_ci nla_put_u32(skb, TCA_FQ_PIE_TARGET, 4778c2ecf20Sopenharmony_ci ((u32)PSCHED_TICKS2NS(q->p_params.target)) / 4788c2ecf20Sopenharmony_ci NSEC_PER_USEC) || 4798c2ecf20Sopenharmony_ci nla_put_u32(skb, TCA_FQ_PIE_TUPDATE, 4808c2ecf20Sopenharmony_ci jiffies_to_usecs(q->p_params.tupdate)) || 4818c2ecf20Sopenharmony_ci nla_put_u32(skb, TCA_FQ_PIE_ALPHA, q->p_params.alpha) || 4828c2ecf20Sopenharmony_ci nla_put_u32(skb, TCA_FQ_PIE_BETA, q->p_params.beta) || 4838c2ecf20Sopenharmony_ci nla_put_u32(skb, TCA_FQ_PIE_QUANTUM, q->quantum) || 4848c2ecf20Sopenharmony_ci nla_put_u32(skb, TCA_FQ_PIE_MEMORY_LIMIT, q->memory_limit) || 4858c2ecf20Sopenharmony_ci nla_put_u32(skb, TCA_FQ_PIE_ECN_PROB, q->ecn_prob) || 4868c2ecf20Sopenharmony_ci nla_put_u32(skb, TCA_FQ_PIE_ECN, q->p_params.ecn) || 4878c2ecf20Sopenharmony_ci nla_put_u32(skb, TCA_FQ_PIE_BYTEMODE, q->p_params.bytemode) || 4888c2ecf20Sopenharmony_ci nla_put_u32(skb, TCA_FQ_PIE_DQ_RATE_ESTIMATOR, 4898c2ecf20Sopenharmony_ci q->p_params.dq_rate_estimator)) 4908c2ecf20Sopenharmony_ci goto nla_put_failure; 4918c2ecf20Sopenharmony_ci 4928c2ecf20Sopenharmony_ci return nla_nest_end(skb, opts); 4938c2ecf20Sopenharmony_ci 4948c2ecf20Sopenharmony_cinla_put_failure: 4958c2ecf20Sopenharmony_ci nla_nest_cancel(skb, opts); 4968c2ecf20Sopenharmony_ci return -EMSGSIZE; 4978c2ecf20Sopenharmony_ci} 4988c2ecf20Sopenharmony_ci 4998c2ecf20Sopenharmony_cistatic int fq_pie_dump_stats(struct Qdisc *sch, struct gnet_dump *d) 5008c2ecf20Sopenharmony_ci{ 5018c2ecf20Sopenharmony_ci struct fq_pie_sched_data *q = qdisc_priv(sch); 5028c2ecf20Sopenharmony_ci struct tc_fq_pie_xstats st = { 5038c2ecf20Sopenharmony_ci .packets_in = q->stats.packets_in, 5048c2ecf20Sopenharmony_ci .overlimit = q->stats.overlimit, 5058c2ecf20Sopenharmony_ci .overmemory = q->overmemory, 5068c2ecf20Sopenharmony_ci .dropped = q->stats.dropped, 5078c2ecf20Sopenharmony_ci .ecn_mark = q->stats.ecn_mark, 5088c2ecf20Sopenharmony_ci .new_flow_count = q->new_flow_count, 5098c2ecf20Sopenharmony_ci .memory_usage = q->memory_usage, 5108c2ecf20Sopenharmony_ci }; 5118c2ecf20Sopenharmony_ci struct list_head *pos; 5128c2ecf20Sopenharmony_ci 5138c2ecf20Sopenharmony_ci sch_tree_lock(sch); 5148c2ecf20Sopenharmony_ci list_for_each(pos, &q->new_flows) 5158c2ecf20Sopenharmony_ci st.new_flows_len++; 5168c2ecf20Sopenharmony_ci 5178c2ecf20Sopenharmony_ci list_for_each(pos, &q->old_flows) 5188c2ecf20Sopenharmony_ci st.old_flows_len++; 5198c2ecf20Sopenharmony_ci sch_tree_unlock(sch); 5208c2ecf20Sopenharmony_ci 5218c2ecf20Sopenharmony_ci return gnet_stats_copy_app(d, &st, sizeof(st)); 5228c2ecf20Sopenharmony_ci} 5238c2ecf20Sopenharmony_ci 5248c2ecf20Sopenharmony_cistatic void fq_pie_reset(struct Qdisc *sch) 5258c2ecf20Sopenharmony_ci{ 5268c2ecf20Sopenharmony_ci struct fq_pie_sched_data *q = qdisc_priv(sch); 5278c2ecf20Sopenharmony_ci u32 idx; 5288c2ecf20Sopenharmony_ci 5298c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&q->new_flows); 5308c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&q->old_flows); 5318c2ecf20Sopenharmony_ci for (idx = 0; idx < q->flows_cnt; idx++) { 5328c2ecf20Sopenharmony_ci struct fq_pie_flow *flow = q->flows + idx; 5338c2ecf20Sopenharmony_ci 5348c2ecf20Sopenharmony_ci /* Removes all packets from flow */ 5358c2ecf20Sopenharmony_ci rtnl_kfree_skbs(flow->head, flow->tail); 5368c2ecf20Sopenharmony_ci flow->head = NULL; 5378c2ecf20Sopenharmony_ci 5388c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&flow->flowchain); 5398c2ecf20Sopenharmony_ci pie_vars_init(&flow->vars); 5408c2ecf20Sopenharmony_ci } 5418c2ecf20Sopenharmony_ci} 5428c2ecf20Sopenharmony_ci 5438c2ecf20Sopenharmony_cistatic void fq_pie_destroy(struct Qdisc *sch) 5448c2ecf20Sopenharmony_ci{ 5458c2ecf20Sopenharmony_ci struct fq_pie_sched_data *q = qdisc_priv(sch); 5468c2ecf20Sopenharmony_ci 5478c2ecf20Sopenharmony_ci tcf_block_put(q->block); 5488c2ecf20Sopenharmony_ci q->p_params.tupdate = 0; 5498c2ecf20Sopenharmony_ci del_timer_sync(&q->adapt_timer); 5508c2ecf20Sopenharmony_ci kvfree(q->flows); 5518c2ecf20Sopenharmony_ci} 5528c2ecf20Sopenharmony_ci 5538c2ecf20Sopenharmony_cistatic struct Qdisc_ops fq_pie_qdisc_ops __read_mostly = { 5548c2ecf20Sopenharmony_ci .id = "fq_pie", 5558c2ecf20Sopenharmony_ci .priv_size = sizeof(struct fq_pie_sched_data), 5568c2ecf20Sopenharmony_ci .enqueue = fq_pie_qdisc_enqueue, 5578c2ecf20Sopenharmony_ci .dequeue = fq_pie_qdisc_dequeue, 5588c2ecf20Sopenharmony_ci .peek = qdisc_peek_dequeued, 5598c2ecf20Sopenharmony_ci .init = fq_pie_init, 5608c2ecf20Sopenharmony_ci .destroy = fq_pie_destroy, 5618c2ecf20Sopenharmony_ci .reset = fq_pie_reset, 5628c2ecf20Sopenharmony_ci .change = fq_pie_change, 5638c2ecf20Sopenharmony_ci .dump = fq_pie_dump, 5648c2ecf20Sopenharmony_ci .dump_stats = fq_pie_dump_stats, 5658c2ecf20Sopenharmony_ci .owner = THIS_MODULE, 5668c2ecf20Sopenharmony_ci}; 5678c2ecf20Sopenharmony_ci 5688c2ecf20Sopenharmony_cistatic int __init fq_pie_module_init(void) 5698c2ecf20Sopenharmony_ci{ 5708c2ecf20Sopenharmony_ci return register_qdisc(&fq_pie_qdisc_ops); 5718c2ecf20Sopenharmony_ci} 5728c2ecf20Sopenharmony_ci 5738c2ecf20Sopenharmony_cistatic void __exit fq_pie_module_exit(void) 5748c2ecf20Sopenharmony_ci{ 5758c2ecf20Sopenharmony_ci unregister_qdisc(&fq_pie_qdisc_ops); 5768c2ecf20Sopenharmony_ci} 5778c2ecf20Sopenharmony_ci 5788c2ecf20Sopenharmony_cimodule_init(fq_pie_module_init); 5798c2ecf20Sopenharmony_cimodule_exit(fq_pie_module_exit); 5808c2ecf20Sopenharmony_ci 5818c2ecf20Sopenharmony_ciMODULE_DESCRIPTION("Flow Queue Proportional Integral controller Enhanced (FQ-PIE)"); 5828c2ecf20Sopenharmony_ciMODULE_AUTHOR("Mohit P. Tahiliani"); 5838c2ecf20Sopenharmony_ciMODULE_LICENSE("GPL"); 584