18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only
28c2ecf20Sopenharmony_ci/* Flow Queue PIE discipline
38c2ecf20Sopenharmony_ci *
48c2ecf20Sopenharmony_ci * Copyright (C) 2019 Mohit P. Tahiliani <tahiliani@nitk.edu.in>
58c2ecf20Sopenharmony_ci * Copyright (C) 2019 Sachin D. Patil <sdp.sachin@gmail.com>
68c2ecf20Sopenharmony_ci * Copyright (C) 2019 V. Saicharan <vsaicharan1998@gmail.com>
78c2ecf20Sopenharmony_ci * Copyright (C) 2019 Mohit Bhasi <mohitbhasi1998@gmail.com>
88c2ecf20Sopenharmony_ci * Copyright (C) 2019 Leslie Monis <lesliemonis@gmail.com>
98c2ecf20Sopenharmony_ci * Copyright (C) 2019 Gautam Ramakrishnan <gautamramk@gmail.com>
108c2ecf20Sopenharmony_ci */
118c2ecf20Sopenharmony_ci
128c2ecf20Sopenharmony_ci#include <linux/jhash.h>
138c2ecf20Sopenharmony_ci#include <linux/sizes.h>
148c2ecf20Sopenharmony_ci#include <linux/vmalloc.h>
158c2ecf20Sopenharmony_ci#include <net/pkt_cls.h>
168c2ecf20Sopenharmony_ci#include <net/pie.h>
178c2ecf20Sopenharmony_ci
188c2ecf20Sopenharmony_ci/* Flow Queue PIE
198c2ecf20Sopenharmony_ci *
208c2ecf20Sopenharmony_ci * Principles:
218c2ecf20Sopenharmony_ci *   - Packets are classified on flows.
228c2ecf20Sopenharmony_ci *   - This is a Stochastic model (as we use a hash, several flows might
238c2ecf20Sopenharmony_ci *                                 be hashed to the same slot)
248c2ecf20Sopenharmony_ci *   - Each flow has a PIE managed queue.
258c2ecf20Sopenharmony_ci *   - Flows are linked onto two (Round Robin) lists,
268c2ecf20Sopenharmony_ci *     so that new flows have priority on old ones.
278c2ecf20Sopenharmony_ci *   - For a given flow, packets are not reordered.
288c2ecf20Sopenharmony_ci *   - Drops during enqueue only.
298c2ecf20Sopenharmony_ci *   - ECN capability is off by default.
308c2ecf20Sopenharmony_ci *   - ECN threshold (if ECN is enabled) is at 10% by default.
318c2ecf20Sopenharmony_ci *   - Uses timestamps to calculate queue delay by default.
328c2ecf20Sopenharmony_ci */
338c2ecf20Sopenharmony_ci
348c2ecf20Sopenharmony_ci/**
358c2ecf20Sopenharmony_ci * struct fq_pie_flow - contains data for each flow
368c2ecf20Sopenharmony_ci * @vars:	pie vars associated with the flow
378c2ecf20Sopenharmony_ci * @deficit:	number of remaining byte credits
388c2ecf20Sopenharmony_ci * @backlog:	size of data in the flow
398c2ecf20Sopenharmony_ci * @qlen:	number of packets in the flow
408c2ecf20Sopenharmony_ci * @flowchain:	flowchain for the flow
418c2ecf20Sopenharmony_ci * @head:	first packet in the flow
428c2ecf20Sopenharmony_ci * @tail:	last packet in the flow
438c2ecf20Sopenharmony_ci */
448c2ecf20Sopenharmony_cistruct fq_pie_flow {
458c2ecf20Sopenharmony_ci	struct pie_vars vars;
468c2ecf20Sopenharmony_ci	s32 deficit;
478c2ecf20Sopenharmony_ci	u32 backlog;
488c2ecf20Sopenharmony_ci	u32 qlen;
498c2ecf20Sopenharmony_ci	struct list_head flowchain;
508c2ecf20Sopenharmony_ci	struct sk_buff *head;
518c2ecf20Sopenharmony_ci	struct sk_buff *tail;
528c2ecf20Sopenharmony_ci};
538c2ecf20Sopenharmony_ci
548c2ecf20Sopenharmony_cistruct fq_pie_sched_data {
558c2ecf20Sopenharmony_ci	struct tcf_proto __rcu *filter_list; /* optional external classifier */
568c2ecf20Sopenharmony_ci	struct tcf_block *block;
578c2ecf20Sopenharmony_ci	struct fq_pie_flow *flows;
588c2ecf20Sopenharmony_ci	struct Qdisc *sch;
598c2ecf20Sopenharmony_ci	struct list_head old_flows;
608c2ecf20Sopenharmony_ci	struct list_head new_flows;
618c2ecf20Sopenharmony_ci	struct pie_params p_params;
628c2ecf20Sopenharmony_ci	u32 ecn_prob;
638c2ecf20Sopenharmony_ci	u32 flows_cnt;
648c2ecf20Sopenharmony_ci	u32 flows_cursor;
658c2ecf20Sopenharmony_ci	u32 quantum;
668c2ecf20Sopenharmony_ci	u32 memory_limit;
678c2ecf20Sopenharmony_ci	u32 new_flow_count;
688c2ecf20Sopenharmony_ci	u32 memory_usage;
698c2ecf20Sopenharmony_ci	u32 overmemory;
708c2ecf20Sopenharmony_ci	struct pie_stats stats;
718c2ecf20Sopenharmony_ci	struct timer_list adapt_timer;
728c2ecf20Sopenharmony_ci};
738c2ecf20Sopenharmony_ci
748c2ecf20Sopenharmony_cistatic unsigned int fq_pie_hash(const struct fq_pie_sched_data *q,
758c2ecf20Sopenharmony_ci				struct sk_buff *skb)
768c2ecf20Sopenharmony_ci{
778c2ecf20Sopenharmony_ci	return reciprocal_scale(skb_get_hash(skb), q->flows_cnt);
788c2ecf20Sopenharmony_ci}
798c2ecf20Sopenharmony_ci
808c2ecf20Sopenharmony_cistatic unsigned int fq_pie_classify(struct sk_buff *skb, struct Qdisc *sch,
818c2ecf20Sopenharmony_ci				    int *qerr)
828c2ecf20Sopenharmony_ci{
838c2ecf20Sopenharmony_ci	struct fq_pie_sched_data *q = qdisc_priv(sch);
848c2ecf20Sopenharmony_ci	struct tcf_proto *filter;
858c2ecf20Sopenharmony_ci	struct tcf_result res;
868c2ecf20Sopenharmony_ci	int result;
878c2ecf20Sopenharmony_ci
888c2ecf20Sopenharmony_ci	if (TC_H_MAJ(skb->priority) == sch->handle &&
898c2ecf20Sopenharmony_ci	    TC_H_MIN(skb->priority) > 0 &&
908c2ecf20Sopenharmony_ci	    TC_H_MIN(skb->priority) <= q->flows_cnt)
918c2ecf20Sopenharmony_ci		return TC_H_MIN(skb->priority);
928c2ecf20Sopenharmony_ci
938c2ecf20Sopenharmony_ci	filter = rcu_dereference_bh(q->filter_list);
948c2ecf20Sopenharmony_ci	if (!filter)
958c2ecf20Sopenharmony_ci		return fq_pie_hash(q, skb) + 1;
968c2ecf20Sopenharmony_ci
978c2ecf20Sopenharmony_ci	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
988c2ecf20Sopenharmony_ci	result = tcf_classify(skb, filter, &res, false);
998c2ecf20Sopenharmony_ci	if (result >= 0) {
1008c2ecf20Sopenharmony_ci#ifdef CONFIG_NET_CLS_ACT
1018c2ecf20Sopenharmony_ci		switch (result) {
1028c2ecf20Sopenharmony_ci		case TC_ACT_STOLEN:
1038c2ecf20Sopenharmony_ci		case TC_ACT_QUEUED:
1048c2ecf20Sopenharmony_ci		case TC_ACT_TRAP:
1058c2ecf20Sopenharmony_ci			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
1068c2ecf20Sopenharmony_ci			fallthrough;
1078c2ecf20Sopenharmony_ci		case TC_ACT_SHOT:
1088c2ecf20Sopenharmony_ci			return 0;
1098c2ecf20Sopenharmony_ci		}
1108c2ecf20Sopenharmony_ci#endif
1118c2ecf20Sopenharmony_ci		if (TC_H_MIN(res.classid) <= q->flows_cnt)
1128c2ecf20Sopenharmony_ci			return TC_H_MIN(res.classid);
1138c2ecf20Sopenharmony_ci	}
1148c2ecf20Sopenharmony_ci	return 0;
1158c2ecf20Sopenharmony_ci}
1168c2ecf20Sopenharmony_ci
1178c2ecf20Sopenharmony_ci/* add skb to flow queue (tail add) */
1188c2ecf20Sopenharmony_cistatic inline void flow_queue_add(struct fq_pie_flow *flow,
1198c2ecf20Sopenharmony_ci				  struct sk_buff *skb)
1208c2ecf20Sopenharmony_ci{
1218c2ecf20Sopenharmony_ci	if (!flow->head)
1228c2ecf20Sopenharmony_ci		flow->head = skb;
1238c2ecf20Sopenharmony_ci	else
1248c2ecf20Sopenharmony_ci		flow->tail->next = skb;
1258c2ecf20Sopenharmony_ci	flow->tail = skb;
1268c2ecf20Sopenharmony_ci	skb->next = NULL;
1278c2ecf20Sopenharmony_ci}
1288c2ecf20Sopenharmony_ci
1298c2ecf20Sopenharmony_cistatic int fq_pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
1308c2ecf20Sopenharmony_ci				struct sk_buff **to_free)
1318c2ecf20Sopenharmony_ci{
1328c2ecf20Sopenharmony_ci	struct fq_pie_sched_data *q = qdisc_priv(sch);
1338c2ecf20Sopenharmony_ci	struct fq_pie_flow *sel_flow;
1348c2ecf20Sopenharmony_ci	int ret;
1358c2ecf20Sopenharmony_ci	u8 memory_limited = false;
1368c2ecf20Sopenharmony_ci	u8 enqueue = false;
1378c2ecf20Sopenharmony_ci	u32 pkt_len;
1388c2ecf20Sopenharmony_ci	u32 idx;
1398c2ecf20Sopenharmony_ci
1408c2ecf20Sopenharmony_ci	/* Classifies packet into corresponding flow */
1418c2ecf20Sopenharmony_ci	idx = fq_pie_classify(skb, sch, &ret);
1428c2ecf20Sopenharmony_ci	if (idx == 0) {
1438c2ecf20Sopenharmony_ci		if (ret & __NET_XMIT_BYPASS)
1448c2ecf20Sopenharmony_ci			qdisc_qstats_drop(sch);
1458c2ecf20Sopenharmony_ci		__qdisc_drop(skb, to_free);
1468c2ecf20Sopenharmony_ci		return ret;
1478c2ecf20Sopenharmony_ci	}
1488c2ecf20Sopenharmony_ci	idx--;
1498c2ecf20Sopenharmony_ci
1508c2ecf20Sopenharmony_ci	sel_flow = &q->flows[idx];
1518c2ecf20Sopenharmony_ci	/* Checks whether adding a new packet would exceed memory limit */
1528c2ecf20Sopenharmony_ci	get_pie_cb(skb)->mem_usage = skb->truesize;
1538c2ecf20Sopenharmony_ci	memory_limited = q->memory_usage > q->memory_limit + skb->truesize;
1548c2ecf20Sopenharmony_ci
1558c2ecf20Sopenharmony_ci	/* Checks if the qdisc is full */
1568c2ecf20Sopenharmony_ci	if (unlikely(qdisc_qlen(sch) >= sch->limit)) {
1578c2ecf20Sopenharmony_ci		q->stats.overlimit++;
1588c2ecf20Sopenharmony_ci		goto out;
1598c2ecf20Sopenharmony_ci	} else if (unlikely(memory_limited)) {
1608c2ecf20Sopenharmony_ci		q->overmemory++;
1618c2ecf20Sopenharmony_ci	}
1628c2ecf20Sopenharmony_ci
1638c2ecf20Sopenharmony_ci	if (!pie_drop_early(sch, &q->p_params, &sel_flow->vars,
1648c2ecf20Sopenharmony_ci			    sel_flow->backlog, skb->len)) {
1658c2ecf20Sopenharmony_ci		enqueue = true;
1668c2ecf20Sopenharmony_ci	} else if (q->p_params.ecn &&
1678c2ecf20Sopenharmony_ci		   sel_flow->vars.prob <= (MAX_PROB / 100) * q->ecn_prob &&
1688c2ecf20Sopenharmony_ci		   INET_ECN_set_ce(skb)) {
1698c2ecf20Sopenharmony_ci		/* If packet is ecn capable, mark it if drop probability
1708c2ecf20Sopenharmony_ci		 * is lower than the parameter ecn_prob, else drop it.
1718c2ecf20Sopenharmony_ci		 */
1728c2ecf20Sopenharmony_ci		q->stats.ecn_mark++;
1738c2ecf20Sopenharmony_ci		enqueue = true;
1748c2ecf20Sopenharmony_ci	}
1758c2ecf20Sopenharmony_ci	if (enqueue) {
1768c2ecf20Sopenharmony_ci		/* Set enqueue time only when dq_rate_estimator is disabled. */
1778c2ecf20Sopenharmony_ci		if (!q->p_params.dq_rate_estimator)
1788c2ecf20Sopenharmony_ci			pie_set_enqueue_time(skb);
1798c2ecf20Sopenharmony_ci
1808c2ecf20Sopenharmony_ci		pkt_len = qdisc_pkt_len(skb);
1818c2ecf20Sopenharmony_ci		q->stats.packets_in++;
1828c2ecf20Sopenharmony_ci		q->memory_usage += skb->truesize;
1838c2ecf20Sopenharmony_ci		sch->qstats.backlog += pkt_len;
1848c2ecf20Sopenharmony_ci		sch->q.qlen++;
1858c2ecf20Sopenharmony_ci		flow_queue_add(sel_flow, skb);
1868c2ecf20Sopenharmony_ci		if (list_empty(&sel_flow->flowchain)) {
1878c2ecf20Sopenharmony_ci			list_add_tail(&sel_flow->flowchain, &q->new_flows);
1888c2ecf20Sopenharmony_ci			q->new_flow_count++;
1898c2ecf20Sopenharmony_ci			sel_flow->deficit = q->quantum;
1908c2ecf20Sopenharmony_ci			sel_flow->qlen = 0;
1918c2ecf20Sopenharmony_ci			sel_flow->backlog = 0;
1928c2ecf20Sopenharmony_ci		}
1938c2ecf20Sopenharmony_ci		sel_flow->qlen++;
1948c2ecf20Sopenharmony_ci		sel_flow->backlog += pkt_len;
1958c2ecf20Sopenharmony_ci		return NET_XMIT_SUCCESS;
1968c2ecf20Sopenharmony_ci	}
1978c2ecf20Sopenharmony_ciout:
1988c2ecf20Sopenharmony_ci	q->stats.dropped++;
1998c2ecf20Sopenharmony_ci	sel_flow->vars.accu_prob = 0;
2008c2ecf20Sopenharmony_ci	__qdisc_drop(skb, to_free);
2018c2ecf20Sopenharmony_ci	qdisc_qstats_drop(sch);
2028c2ecf20Sopenharmony_ci	return NET_XMIT_CN;
2038c2ecf20Sopenharmony_ci}
2048c2ecf20Sopenharmony_ci
2058c2ecf20Sopenharmony_cistatic struct netlink_range_validation fq_pie_q_range = {
2068c2ecf20Sopenharmony_ci	.min = 1,
2078c2ecf20Sopenharmony_ci	.max = 1 << 20,
2088c2ecf20Sopenharmony_ci};
2098c2ecf20Sopenharmony_ci
2108c2ecf20Sopenharmony_cistatic const struct nla_policy fq_pie_policy[TCA_FQ_PIE_MAX + 1] = {
2118c2ecf20Sopenharmony_ci	[TCA_FQ_PIE_LIMIT]		= {.type = NLA_U32},
2128c2ecf20Sopenharmony_ci	[TCA_FQ_PIE_FLOWS]		= {.type = NLA_U32},
2138c2ecf20Sopenharmony_ci	[TCA_FQ_PIE_TARGET]		= {.type = NLA_U32},
2148c2ecf20Sopenharmony_ci	[TCA_FQ_PIE_TUPDATE]		= {.type = NLA_U32},
2158c2ecf20Sopenharmony_ci	[TCA_FQ_PIE_ALPHA]		= {.type = NLA_U32},
2168c2ecf20Sopenharmony_ci	[TCA_FQ_PIE_BETA]		= {.type = NLA_U32},
2178c2ecf20Sopenharmony_ci	[TCA_FQ_PIE_QUANTUM]		=
2188c2ecf20Sopenharmony_ci			NLA_POLICY_FULL_RANGE(NLA_U32, &fq_pie_q_range),
2198c2ecf20Sopenharmony_ci	[TCA_FQ_PIE_MEMORY_LIMIT]	= {.type = NLA_U32},
2208c2ecf20Sopenharmony_ci	[TCA_FQ_PIE_ECN_PROB]		= {.type = NLA_U32},
2218c2ecf20Sopenharmony_ci	[TCA_FQ_PIE_ECN]		= {.type = NLA_U32},
2228c2ecf20Sopenharmony_ci	[TCA_FQ_PIE_BYTEMODE]		= {.type = NLA_U32},
2238c2ecf20Sopenharmony_ci	[TCA_FQ_PIE_DQ_RATE_ESTIMATOR]	= {.type = NLA_U32},
2248c2ecf20Sopenharmony_ci};
2258c2ecf20Sopenharmony_ci
2268c2ecf20Sopenharmony_cistatic inline struct sk_buff *dequeue_head(struct fq_pie_flow *flow)
2278c2ecf20Sopenharmony_ci{
2288c2ecf20Sopenharmony_ci	struct sk_buff *skb = flow->head;
2298c2ecf20Sopenharmony_ci
2308c2ecf20Sopenharmony_ci	flow->head = skb->next;
2318c2ecf20Sopenharmony_ci	skb->next = NULL;
2328c2ecf20Sopenharmony_ci	return skb;
2338c2ecf20Sopenharmony_ci}
2348c2ecf20Sopenharmony_ci
2358c2ecf20Sopenharmony_cistatic struct sk_buff *fq_pie_qdisc_dequeue(struct Qdisc *sch)
2368c2ecf20Sopenharmony_ci{
2378c2ecf20Sopenharmony_ci	struct fq_pie_sched_data *q = qdisc_priv(sch);
2388c2ecf20Sopenharmony_ci	struct sk_buff *skb = NULL;
2398c2ecf20Sopenharmony_ci	struct fq_pie_flow *flow;
2408c2ecf20Sopenharmony_ci	struct list_head *head;
2418c2ecf20Sopenharmony_ci	u32 pkt_len;
2428c2ecf20Sopenharmony_ci
2438c2ecf20Sopenharmony_cibegin:
2448c2ecf20Sopenharmony_ci	head = &q->new_flows;
2458c2ecf20Sopenharmony_ci	if (list_empty(head)) {
2468c2ecf20Sopenharmony_ci		head = &q->old_flows;
2478c2ecf20Sopenharmony_ci		if (list_empty(head))
2488c2ecf20Sopenharmony_ci			return NULL;
2498c2ecf20Sopenharmony_ci	}
2508c2ecf20Sopenharmony_ci
2518c2ecf20Sopenharmony_ci	flow = list_first_entry(head, struct fq_pie_flow, flowchain);
2528c2ecf20Sopenharmony_ci	/* Flow has exhausted all its credits */
2538c2ecf20Sopenharmony_ci	if (flow->deficit <= 0) {
2548c2ecf20Sopenharmony_ci		flow->deficit += q->quantum;
2558c2ecf20Sopenharmony_ci		list_move_tail(&flow->flowchain, &q->old_flows);
2568c2ecf20Sopenharmony_ci		goto begin;
2578c2ecf20Sopenharmony_ci	}
2588c2ecf20Sopenharmony_ci
2598c2ecf20Sopenharmony_ci	if (flow->head) {
2608c2ecf20Sopenharmony_ci		skb = dequeue_head(flow);
2618c2ecf20Sopenharmony_ci		pkt_len = qdisc_pkt_len(skb);
2628c2ecf20Sopenharmony_ci		sch->qstats.backlog -= pkt_len;
2638c2ecf20Sopenharmony_ci		sch->q.qlen--;
2648c2ecf20Sopenharmony_ci		qdisc_bstats_update(sch, skb);
2658c2ecf20Sopenharmony_ci	}
2668c2ecf20Sopenharmony_ci
2678c2ecf20Sopenharmony_ci	if (!skb) {
2688c2ecf20Sopenharmony_ci		/* force a pass through old_flows to prevent starvation */
2698c2ecf20Sopenharmony_ci		if (head == &q->new_flows && !list_empty(&q->old_flows))
2708c2ecf20Sopenharmony_ci			list_move_tail(&flow->flowchain, &q->old_flows);
2718c2ecf20Sopenharmony_ci		else
2728c2ecf20Sopenharmony_ci			list_del_init(&flow->flowchain);
2738c2ecf20Sopenharmony_ci		goto begin;
2748c2ecf20Sopenharmony_ci	}
2758c2ecf20Sopenharmony_ci
2768c2ecf20Sopenharmony_ci	flow->qlen--;
2778c2ecf20Sopenharmony_ci	flow->deficit -= pkt_len;
2788c2ecf20Sopenharmony_ci	flow->backlog -= pkt_len;
2798c2ecf20Sopenharmony_ci	q->memory_usage -= get_pie_cb(skb)->mem_usage;
2808c2ecf20Sopenharmony_ci	pie_process_dequeue(skb, &q->p_params, &flow->vars, flow->backlog);
2818c2ecf20Sopenharmony_ci	return skb;
2828c2ecf20Sopenharmony_ci}
2838c2ecf20Sopenharmony_ci
2848c2ecf20Sopenharmony_cistatic int fq_pie_change(struct Qdisc *sch, struct nlattr *opt,
2858c2ecf20Sopenharmony_ci			 struct netlink_ext_ack *extack)
2868c2ecf20Sopenharmony_ci{
2878c2ecf20Sopenharmony_ci	struct fq_pie_sched_data *q = qdisc_priv(sch);
2888c2ecf20Sopenharmony_ci	struct nlattr *tb[TCA_FQ_PIE_MAX + 1];
2898c2ecf20Sopenharmony_ci	unsigned int len_dropped = 0;
2908c2ecf20Sopenharmony_ci	unsigned int num_dropped = 0;
2918c2ecf20Sopenharmony_ci	int err;
2928c2ecf20Sopenharmony_ci
2938c2ecf20Sopenharmony_ci	if (!opt)
2948c2ecf20Sopenharmony_ci		return -EINVAL;
2958c2ecf20Sopenharmony_ci
2968c2ecf20Sopenharmony_ci	err = nla_parse_nested(tb, TCA_FQ_PIE_MAX, opt, fq_pie_policy, extack);
2978c2ecf20Sopenharmony_ci	if (err < 0)
2988c2ecf20Sopenharmony_ci		return err;
2998c2ecf20Sopenharmony_ci
3008c2ecf20Sopenharmony_ci	sch_tree_lock(sch);
3018c2ecf20Sopenharmony_ci	if (tb[TCA_FQ_PIE_LIMIT]) {
3028c2ecf20Sopenharmony_ci		u32 limit = nla_get_u32(tb[TCA_FQ_PIE_LIMIT]);
3038c2ecf20Sopenharmony_ci
3048c2ecf20Sopenharmony_ci		q->p_params.limit = limit;
3058c2ecf20Sopenharmony_ci		sch->limit = limit;
3068c2ecf20Sopenharmony_ci	}
3078c2ecf20Sopenharmony_ci	if (tb[TCA_FQ_PIE_FLOWS]) {
3088c2ecf20Sopenharmony_ci		if (q->flows) {
3098c2ecf20Sopenharmony_ci			NL_SET_ERR_MSG_MOD(extack,
3108c2ecf20Sopenharmony_ci					   "Number of flows cannot be changed");
3118c2ecf20Sopenharmony_ci			goto flow_error;
3128c2ecf20Sopenharmony_ci		}
3138c2ecf20Sopenharmony_ci		q->flows_cnt = nla_get_u32(tb[TCA_FQ_PIE_FLOWS]);
3148c2ecf20Sopenharmony_ci		if (!q->flows_cnt || q->flows_cnt > 65536) {
3158c2ecf20Sopenharmony_ci			NL_SET_ERR_MSG_MOD(extack,
3168c2ecf20Sopenharmony_ci					   "Number of flows must range in [1..65536]");
3178c2ecf20Sopenharmony_ci			goto flow_error;
3188c2ecf20Sopenharmony_ci		}
3198c2ecf20Sopenharmony_ci	}
3208c2ecf20Sopenharmony_ci
3218c2ecf20Sopenharmony_ci	/* convert from microseconds to pschedtime */
3228c2ecf20Sopenharmony_ci	if (tb[TCA_FQ_PIE_TARGET]) {
3238c2ecf20Sopenharmony_ci		/* target is in us */
3248c2ecf20Sopenharmony_ci		u32 target = nla_get_u32(tb[TCA_FQ_PIE_TARGET]);
3258c2ecf20Sopenharmony_ci
3268c2ecf20Sopenharmony_ci		/* convert to pschedtime */
3278c2ecf20Sopenharmony_ci		q->p_params.target =
3288c2ecf20Sopenharmony_ci			PSCHED_NS2TICKS((u64)target * NSEC_PER_USEC);
3298c2ecf20Sopenharmony_ci	}
3308c2ecf20Sopenharmony_ci
3318c2ecf20Sopenharmony_ci	/* tupdate is in jiffies */
3328c2ecf20Sopenharmony_ci	if (tb[TCA_FQ_PIE_TUPDATE])
3338c2ecf20Sopenharmony_ci		q->p_params.tupdate =
3348c2ecf20Sopenharmony_ci			usecs_to_jiffies(nla_get_u32(tb[TCA_FQ_PIE_TUPDATE]));
3358c2ecf20Sopenharmony_ci
3368c2ecf20Sopenharmony_ci	if (tb[TCA_FQ_PIE_ALPHA])
3378c2ecf20Sopenharmony_ci		q->p_params.alpha = nla_get_u32(tb[TCA_FQ_PIE_ALPHA]);
3388c2ecf20Sopenharmony_ci
3398c2ecf20Sopenharmony_ci	if (tb[TCA_FQ_PIE_BETA])
3408c2ecf20Sopenharmony_ci		q->p_params.beta = nla_get_u32(tb[TCA_FQ_PIE_BETA]);
3418c2ecf20Sopenharmony_ci
3428c2ecf20Sopenharmony_ci	if (tb[TCA_FQ_PIE_QUANTUM])
3438c2ecf20Sopenharmony_ci		q->quantum = nla_get_u32(tb[TCA_FQ_PIE_QUANTUM]);
3448c2ecf20Sopenharmony_ci
3458c2ecf20Sopenharmony_ci	if (tb[TCA_FQ_PIE_MEMORY_LIMIT])
3468c2ecf20Sopenharmony_ci		q->memory_limit = nla_get_u32(tb[TCA_FQ_PIE_MEMORY_LIMIT]);
3478c2ecf20Sopenharmony_ci
3488c2ecf20Sopenharmony_ci	if (tb[TCA_FQ_PIE_ECN_PROB])
3498c2ecf20Sopenharmony_ci		q->ecn_prob = nla_get_u32(tb[TCA_FQ_PIE_ECN_PROB]);
3508c2ecf20Sopenharmony_ci
3518c2ecf20Sopenharmony_ci	if (tb[TCA_FQ_PIE_ECN])
3528c2ecf20Sopenharmony_ci		q->p_params.ecn = nla_get_u32(tb[TCA_FQ_PIE_ECN]);
3538c2ecf20Sopenharmony_ci
3548c2ecf20Sopenharmony_ci	if (tb[TCA_FQ_PIE_BYTEMODE])
3558c2ecf20Sopenharmony_ci		q->p_params.bytemode = nla_get_u32(tb[TCA_FQ_PIE_BYTEMODE]);
3568c2ecf20Sopenharmony_ci
3578c2ecf20Sopenharmony_ci	if (tb[TCA_FQ_PIE_DQ_RATE_ESTIMATOR])
3588c2ecf20Sopenharmony_ci		q->p_params.dq_rate_estimator =
3598c2ecf20Sopenharmony_ci			nla_get_u32(tb[TCA_FQ_PIE_DQ_RATE_ESTIMATOR]);
3608c2ecf20Sopenharmony_ci
3618c2ecf20Sopenharmony_ci	/* Drop excess packets if new limit is lower */
3628c2ecf20Sopenharmony_ci	while (sch->q.qlen > sch->limit) {
3638c2ecf20Sopenharmony_ci		struct sk_buff *skb = fq_pie_qdisc_dequeue(sch);
3648c2ecf20Sopenharmony_ci
3658c2ecf20Sopenharmony_ci		len_dropped += qdisc_pkt_len(skb);
3668c2ecf20Sopenharmony_ci		num_dropped += 1;
3678c2ecf20Sopenharmony_ci		rtnl_kfree_skbs(skb, skb);
3688c2ecf20Sopenharmony_ci	}
3698c2ecf20Sopenharmony_ci	qdisc_tree_reduce_backlog(sch, num_dropped, len_dropped);
3708c2ecf20Sopenharmony_ci
3718c2ecf20Sopenharmony_ci	sch_tree_unlock(sch);
3728c2ecf20Sopenharmony_ci	return 0;
3738c2ecf20Sopenharmony_ci
3748c2ecf20Sopenharmony_ciflow_error:
3758c2ecf20Sopenharmony_ci	sch_tree_unlock(sch);
3768c2ecf20Sopenharmony_ci	return -EINVAL;
3778c2ecf20Sopenharmony_ci}
3788c2ecf20Sopenharmony_ci
3798c2ecf20Sopenharmony_cistatic void fq_pie_timer(struct timer_list *t)
3808c2ecf20Sopenharmony_ci{
3818c2ecf20Sopenharmony_ci	struct fq_pie_sched_data *q = from_timer(q, t, adapt_timer);
3828c2ecf20Sopenharmony_ci	unsigned long next, tupdate;
3838c2ecf20Sopenharmony_ci	struct Qdisc *sch = q->sch;
3848c2ecf20Sopenharmony_ci	spinlock_t *root_lock; /* to lock qdisc for probability calculations */
3858c2ecf20Sopenharmony_ci	int max_cnt, i;
3868c2ecf20Sopenharmony_ci
3878c2ecf20Sopenharmony_ci	root_lock = qdisc_lock(qdisc_root_sleeping(sch));
3888c2ecf20Sopenharmony_ci	spin_lock(root_lock);
3898c2ecf20Sopenharmony_ci
3908c2ecf20Sopenharmony_ci	/* Limit this expensive loop to 2048 flows per round. */
3918c2ecf20Sopenharmony_ci	max_cnt = min_t(int, q->flows_cnt - q->flows_cursor, 2048);
3928c2ecf20Sopenharmony_ci	for (i = 0; i < max_cnt; i++) {
3938c2ecf20Sopenharmony_ci		pie_calculate_probability(&q->p_params,
3948c2ecf20Sopenharmony_ci					  &q->flows[q->flows_cursor].vars,
3958c2ecf20Sopenharmony_ci					  q->flows[q->flows_cursor].backlog);
3968c2ecf20Sopenharmony_ci		q->flows_cursor++;
3978c2ecf20Sopenharmony_ci	}
3988c2ecf20Sopenharmony_ci
3998c2ecf20Sopenharmony_ci	tupdate = q->p_params.tupdate;
4008c2ecf20Sopenharmony_ci	next = 0;
4018c2ecf20Sopenharmony_ci	if (q->flows_cursor >= q->flows_cnt) {
4028c2ecf20Sopenharmony_ci		q->flows_cursor = 0;
4038c2ecf20Sopenharmony_ci		next = tupdate;
4048c2ecf20Sopenharmony_ci	}
4058c2ecf20Sopenharmony_ci	if (tupdate)
4068c2ecf20Sopenharmony_ci		mod_timer(&q->adapt_timer, jiffies + next);
4078c2ecf20Sopenharmony_ci	spin_unlock(root_lock);
4088c2ecf20Sopenharmony_ci}
4098c2ecf20Sopenharmony_ci
4108c2ecf20Sopenharmony_cistatic int fq_pie_init(struct Qdisc *sch, struct nlattr *opt,
4118c2ecf20Sopenharmony_ci		       struct netlink_ext_ack *extack)
4128c2ecf20Sopenharmony_ci{
4138c2ecf20Sopenharmony_ci	struct fq_pie_sched_data *q = qdisc_priv(sch);
4148c2ecf20Sopenharmony_ci	int err;
4158c2ecf20Sopenharmony_ci	u32 idx;
4168c2ecf20Sopenharmony_ci
4178c2ecf20Sopenharmony_ci	pie_params_init(&q->p_params);
4188c2ecf20Sopenharmony_ci	sch->limit = 10 * 1024;
4198c2ecf20Sopenharmony_ci	q->p_params.limit = sch->limit;
4208c2ecf20Sopenharmony_ci	q->quantum = psched_mtu(qdisc_dev(sch));
4218c2ecf20Sopenharmony_ci	q->sch = sch;
4228c2ecf20Sopenharmony_ci	q->ecn_prob = 10;
4238c2ecf20Sopenharmony_ci	q->flows_cnt = 1024;
4248c2ecf20Sopenharmony_ci	q->memory_limit = SZ_32M;
4258c2ecf20Sopenharmony_ci
4268c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&q->new_flows);
4278c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&q->old_flows);
4288c2ecf20Sopenharmony_ci	timer_setup(&q->adapt_timer, fq_pie_timer, 0);
4298c2ecf20Sopenharmony_ci
4308c2ecf20Sopenharmony_ci	if (opt) {
4318c2ecf20Sopenharmony_ci		err = fq_pie_change(sch, opt, extack);
4328c2ecf20Sopenharmony_ci
4338c2ecf20Sopenharmony_ci		if (err)
4348c2ecf20Sopenharmony_ci			return err;
4358c2ecf20Sopenharmony_ci	}
4368c2ecf20Sopenharmony_ci
4378c2ecf20Sopenharmony_ci	err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
4388c2ecf20Sopenharmony_ci	if (err)
4398c2ecf20Sopenharmony_ci		goto init_failure;
4408c2ecf20Sopenharmony_ci
4418c2ecf20Sopenharmony_ci	q->flows = kvcalloc(q->flows_cnt, sizeof(struct fq_pie_flow),
4428c2ecf20Sopenharmony_ci			    GFP_KERNEL);
4438c2ecf20Sopenharmony_ci	if (!q->flows) {
4448c2ecf20Sopenharmony_ci		err = -ENOMEM;
4458c2ecf20Sopenharmony_ci		goto init_failure;
4468c2ecf20Sopenharmony_ci	}
4478c2ecf20Sopenharmony_ci	for (idx = 0; idx < q->flows_cnt; idx++) {
4488c2ecf20Sopenharmony_ci		struct fq_pie_flow *flow = q->flows + idx;
4498c2ecf20Sopenharmony_ci
4508c2ecf20Sopenharmony_ci		INIT_LIST_HEAD(&flow->flowchain);
4518c2ecf20Sopenharmony_ci		pie_vars_init(&flow->vars);
4528c2ecf20Sopenharmony_ci	}
4538c2ecf20Sopenharmony_ci
4548c2ecf20Sopenharmony_ci	mod_timer(&q->adapt_timer, jiffies + HZ / 2);
4558c2ecf20Sopenharmony_ci
4568c2ecf20Sopenharmony_ci	return 0;
4578c2ecf20Sopenharmony_ci
4588c2ecf20Sopenharmony_ciinit_failure:
4598c2ecf20Sopenharmony_ci	q->flows_cnt = 0;
4608c2ecf20Sopenharmony_ci
4618c2ecf20Sopenharmony_ci	return err;
4628c2ecf20Sopenharmony_ci}
4638c2ecf20Sopenharmony_ci
4648c2ecf20Sopenharmony_cistatic int fq_pie_dump(struct Qdisc *sch, struct sk_buff *skb)
4658c2ecf20Sopenharmony_ci{
4668c2ecf20Sopenharmony_ci	struct fq_pie_sched_data *q = qdisc_priv(sch);
4678c2ecf20Sopenharmony_ci	struct nlattr *opts;
4688c2ecf20Sopenharmony_ci
4698c2ecf20Sopenharmony_ci	opts = nla_nest_start(skb, TCA_OPTIONS);
4708c2ecf20Sopenharmony_ci	if (!opts)
4718c2ecf20Sopenharmony_ci		return -EMSGSIZE;
4728c2ecf20Sopenharmony_ci
4738c2ecf20Sopenharmony_ci	/* convert target from pschedtime to us */
4748c2ecf20Sopenharmony_ci	if (nla_put_u32(skb, TCA_FQ_PIE_LIMIT, sch->limit) ||
4758c2ecf20Sopenharmony_ci	    nla_put_u32(skb, TCA_FQ_PIE_FLOWS, q->flows_cnt) ||
4768c2ecf20Sopenharmony_ci	    nla_put_u32(skb, TCA_FQ_PIE_TARGET,
4778c2ecf20Sopenharmony_ci			((u32)PSCHED_TICKS2NS(q->p_params.target)) /
4788c2ecf20Sopenharmony_ci			NSEC_PER_USEC) ||
4798c2ecf20Sopenharmony_ci	    nla_put_u32(skb, TCA_FQ_PIE_TUPDATE,
4808c2ecf20Sopenharmony_ci			jiffies_to_usecs(q->p_params.tupdate)) ||
4818c2ecf20Sopenharmony_ci	    nla_put_u32(skb, TCA_FQ_PIE_ALPHA, q->p_params.alpha) ||
4828c2ecf20Sopenharmony_ci	    nla_put_u32(skb, TCA_FQ_PIE_BETA, q->p_params.beta) ||
4838c2ecf20Sopenharmony_ci	    nla_put_u32(skb, TCA_FQ_PIE_QUANTUM, q->quantum) ||
4848c2ecf20Sopenharmony_ci	    nla_put_u32(skb, TCA_FQ_PIE_MEMORY_LIMIT, q->memory_limit) ||
4858c2ecf20Sopenharmony_ci	    nla_put_u32(skb, TCA_FQ_PIE_ECN_PROB, q->ecn_prob) ||
4868c2ecf20Sopenharmony_ci	    nla_put_u32(skb, TCA_FQ_PIE_ECN, q->p_params.ecn) ||
4878c2ecf20Sopenharmony_ci	    nla_put_u32(skb, TCA_FQ_PIE_BYTEMODE, q->p_params.bytemode) ||
4888c2ecf20Sopenharmony_ci	    nla_put_u32(skb, TCA_FQ_PIE_DQ_RATE_ESTIMATOR,
4898c2ecf20Sopenharmony_ci			q->p_params.dq_rate_estimator))
4908c2ecf20Sopenharmony_ci		goto nla_put_failure;
4918c2ecf20Sopenharmony_ci
4928c2ecf20Sopenharmony_ci	return nla_nest_end(skb, opts);
4938c2ecf20Sopenharmony_ci
4948c2ecf20Sopenharmony_cinla_put_failure:
4958c2ecf20Sopenharmony_ci	nla_nest_cancel(skb, opts);
4968c2ecf20Sopenharmony_ci	return -EMSGSIZE;
4978c2ecf20Sopenharmony_ci}
4988c2ecf20Sopenharmony_ci
4998c2ecf20Sopenharmony_cistatic int fq_pie_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
5008c2ecf20Sopenharmony_ci{
5018c2ecf20Sopenharmony_ci	struct fq_pie_sched_data *q = qdisc_priv(sch);
5028c2ecf20Sopenharmony_ci	struct tc_fq_pie_xstats st = {
5038c2ecf20Sopenharmony_ci		.packets_in	= q->stats.packets_in,
5048c2ecf20Sopenharmony_ci		.overlimit	= q->stats.overlimit,
5058c2ecf20Sopenharmony_ci		.overmemory	= q->overmemory,
5068c2ecf20Sopenharmony_ci		.dropped	= q->stats.dropped,
5078c2ecf20Sopenharmony_ci		.ecn_mark	= q->stats.ecn_mark,
5088c2ecf20Sopenharmony_ci		.new_flow_count = q->new_flow_count,
5098c2ecf20Sopenharmony_ci		.memory_usage   = q->memory_usage,
5108c2ecf20Sopenharmony_ci	};
5118c2ecf20Sopenharmony_ci	struct list_head *pos;
5128c2ecf20Sopenharmony_ci
5138c2ecf20Sopenharmony_ci	sch_tree_lock(sch);
5148c2ecf20Sopenharmony_ci	list_for_each(pos, &q->new_flows)
5158c2ecf20Sopenharmony_ci		st.new_flows_len++;
5168c2ecf20Sopenharmony_ci
5178c2ecf20Sopenharmony_ci	list_for_each(pos, &q->old_flows)
5188c2ecf20Sopenharmony_ci		st.old_flows_len++;
5198c2ecf20Sopenharmony_ci	sch_tree_unlock(sch);
5208c2ecf20Sopenharmony_ci
5218c2ecf20Sopenharmony_ci	return gnet_stats_copy_app(d, &st, sizeof(st));
5228c2ecf20Sopenharmony_ci}
5238c2ecf20Sopenharmony_ci
5248c2ecf20Sopenharmony_cistatic void fq_pie_reset(struct Qdisc *sch)
5258c2ecf20Sopenharmony_ci{
5268c2ecf20Sopenharmony_ci	struct fq_pie_sched_data *q = qdisc_priv(sch);
5278c2ecf20Sopenharmony_ci	u32 idx;
5288c2ecf20Sopenharmony_ci
5298c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&q->new_flows);
5308c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&q->old_flows);
5318c2ecf20Sopenharmony_ci	for (idx = 0; idx < q->flows_cnt; idx++) {
5328c2ecf20Sopenharmony_ci		struct fq_pie_flow *flow = q->flows + idx;
5338c2ecf20Sopenharmony_ci
5348c2ecf20Sopenharmony_ci		/* Removes all packets from flow */
5358c2ecf20Sopenharmony_ci		rtnl_kfree_skbs(flow->head, flow->tail);
5368c2ecf20Sopenharmony_ci		flow->head = NULL;
5378c2ecf20Sopenharmony_ci
5388c2ecf20Sopenharmony_ci		INIT_LIST_HEAD(&flow->flowchain);
5398c2ecf20Sopenharmony_ci		pie_vars_init(&flow->vars);
5408c2ecf20Sopenharmony_ci	}
5418c2ecf20Sopenharmony_ci}
5428c2ecf20Sopenharmony_ci
5438c2ecf20Sopenharmony_cistatic void fq_pie_destroy(struct Qdisc *sch)
5448c2ecf20Sopenharmony_ci{
5458c2ecf20Sopenharmony_ci	struct fq_pie_sched_data *q = qdisc_priv(sch);
5468c2ecf20Sopenharmony_ci
5478c2ecf20Sopenharmony_ci	tcf_block_put(q->block);
5488c2ecf20Sopenharmony_ci	q->p_params.tupdate = 0;
5498c2ecf20Sopenharmony_ci	del_timer_sync(&q->adapt_timer);
5508c2ecf20Sopenharmony_ci	kvfree(q->flows);
5518c2ecf20Sopenharmony_ci}
5528c2ecf20Sopenharmony_ci
5538c2ecf20Sopenharmony_cistatic struct Qdisc_ops fq_pie_qdisc_ops __read_mostly = {
5548c2ecf20Sopenharmony_ci	.id		= "fq_pie",
5558c2ecf20Sopenharmony_ci	.priv_size	= sizeof(struct fq_pie_sched_data),
5568c2ecf20Sopenharmony_ci	.enqueue	= fq_pie_qdisc_enqueue,
5578c2ecf20Sopenharmony_ci	.dequeue	= fq_pie_qdisc_dequeue,
5588c2ecf20Sopenharmony_ci	.peek		= qdisc_peek_dequeued,
5598c2ecf20Sopenharmony_ci	.init		= fq_pie_init,
5608c2ecf20Sopenharmony_ci	.destroy	= fq_pie_destroy,
5618c2ecf20Sopenharmony_ci	.reset		= fq_pie_reset,
5628c2ecf20Sopenharmony_ci	.change		= fq_pie_change,
5638c2ecf20Sopenharmony_ci	.dump		= fq_pie_dump,
5648c2ecf20Sopenharmony_ci	.dump_stats	= fq_pie_dump_stats,
5658c2ecf20Sopenharmony_ci	.owner		= THIS_MODULE,
5668c2ecf20Sopenharmony_ci};
5678c2ecf20Sopenharmony_ci
5688c2ecf20Sopenharmony_cistatic int __init fq_pie_module_init(void)
5698c2ecf20Sopenharmony_ci{
5708c2ecf20Sopenharmony_ci	return register_qdisc(&fq_pie_qdisc_ops);
5718c2ecf20Sopenharmony_ci}
5728c2ecf20Sopenharmony_ci
5738c2ecf20Sopenharmony_cistatic void __exit fq_pie_module_exit(void)
5748c2ecf20Sopenharmony_ci{
5758c2ecf20Sopenharmony_ci	unregister_qdisc(&fq_pie_qdisc_ops);
5768c2ecf20Sopenharmony_ci}
5778c2ecf20Sopenharmony_ci
5788c2ecf20Sopenharmony_cimodule_init(fq_pie_module_init);
5798c2ecf20Sopenharmony_cimodule_exit(fq_pie_module_exit);
5808c2ecf20Sopenharmony_ci
5818c2ecf20Sopenharmony_ciMODULE_DESCRIPTION("Flow Queue Proportional Integral controller Enhanced (FQ-PIE)");
5828c2ecf20Sopenharmony_ciMODULE_AUTHOR("Mohit P. Tahiliani");
5838c2ecf20Sopenharmony_ciMODULE_LICENSE("GPL");
584