18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * net/sched/sch_qfq.c         Quick Fair Queueing Plus Scheduler.
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci * Copyright (c) 2009 Fabio Checconi, Luigi Rizzo, and Paolo Valente.
68c2ecf20Sopenharmony_ci * Copyright (c) 2012 Paolo Valente.
78c2ecf20Sopenharmony_ci */
88c2ecf20Sopenharmony_ci
98c2ecf20Sopenharmony_ci#include <linux/module.h>
108c2ecf20Sopenharmony_ci#include <linux/init.h>
118c2ecf20Sopenharmony_ci#include <linux/bitops.h>
128c2ecf20Sopenharmony_ci#include <linux/errno.h>
138c2ecf20Sopenharmony_ci#include <linux/netdevice.h>
148c2ecf20Sopenharmony_ci#include <linux/pkt_sched.h>
158c2ecf20Sopenharmony_ci#include <net/sch_generic.h>
168c2ecf20Sopenharmony_ci#include <net/pkt_sched.h>
178c2ecf20Sopenharmony_ci#include <net/pkt_cls.h>
188c2ecf20Sopenharmony_ci
198c2ecf20Sopenharmony_ci
208c2ecf20Sopenharmony_ci/*  Quick Fair Queueing Plus
218c2ecf20Sopenharmony_ci    ========================
228c2ecf20Sopenharmony_ci
238c2ecf20Sopenharmony_ci    Sources:
248c2ecf20Sopenharmony_ci
258c2ecf20Sopenharmony_ci    [1] Paolo Valente,
268c2ecf20Sopenharmony_ci    "Reducing the Execution Time of Fair-Queueing Schedulers."
278c2ecf20Sopenharmony_ci    http://algo.ing.unimo.it/people/paolo/agg-sched/agg-sched.pdf
288c2ecf20Sopenharmony_ci
298c2ecf20Sopenharmony_ci    Sources for QFQ:
308c2ecf20Sopenharmony_ci
318c2ecf20Sopenharmony_ci    [2] Fabio Checconi, Luigi Rizzo, and Paolo Valente: "QFQ: Efficient
328c2ecf20Sopenharmony_ci    Packet Scheduling with Tight Bandwidth Distribution Guarantees."
338c2ecf20Sopenharmony_ci
348c2ecf20Sopenharmony_ci    See also:
358c2ecf20Sopenharmony_ci    http://retis.sssup.it/~fabio/linux/qfq/
368c2ecf20Sopenharmony_ci */
378c2ecf20Sopenharmony_ci
388c2ecf20Sopenharmony_ci/*
398c2ecf20Sopenharmony_ci
408c2ecf20Sopenharmony_ci  QFQ+ divides classes into aggregates of at most MAX_AGG_CLASSES
418c2ecf20Sopenharmony_ci  classes. Each aggregate is timestamped with a virtual start time S
428c2ecf20Sopenharmony_ci  and a virtual finish time F, and scheduled according to its
438c2ecf20Sopenharmony_ci  timestamps. S and F are computed as a function of a system virtual
448c2ecf20Sopenharmony_ci  time function V. The classes within each aggregate are instead
458c2ecf20Sopenharmony_ci  scheduled with DRR.
468c2ecf20Sopenharmony_ci
478c2ecf20Sopenharmony_ci  To speed up operations, QFQ+ divides also aggregates into a limited
488c2ecf20Sopenharmony_ci  number of groups. Which group a class belongs to depends on the
498c2ecf20Sopenharmony_ci  ratio between the maximum packet length for the class and the weight
508c2ecf20Sopenharmony_ci  of the class. Groups have their own S and F. In the end, QFQ+
518c2ecf20Sopenharmony_ci  schedules groups, then aggregates within groups, then classes within
528c2ecf20Sopenharmony_ci  aggregates. See [1] and [2] for a full description.
538c2ecf20Sopenharmony_ci
548c2ecf20Sopenharmony_ci  Virtual time computations.
558c2ecf20Sopenharmony_ci
568c2ecf20Sopenharmony_ci  S, F and V are all computed in fixed point arithmetic with
578c2ecf20Sopenharmony_ci  FRAC_BITS decimal bits.
588c2ecf20Sopenharmony_ci
598c2ecf20Sopenharmony_ci  QFQ_MAX_INDEX is the maximum index allowed for a group. We need
608c2ecf20Sopenharmony_ci	one bit per index.
618c2ecf20Sopenharmony_ci  QFQ_MAX_WSHIFT is the maximum power of two supported as a weight.
628c2ecf20Sopenharmony_ci
638c2ecf20Sopenharmony_ci  The layout of the bits is as below:
648c2ecf20Sopenharmony_ci
658c2ecf20Sopenharmony_ci                   [ MTU_SHIFT ][      FRAC_BITS    ]
668c2ecf20Sopenharmony_ci                   [ MAX_INDEX    ][ MIN_SLOT_SHIFT ]
678c2ecf20Sopenharmony_ci				 ^.__grp->index = 0
688c2ecf20Sopenharmony_ci				 *.__grp->slot_shift
698c2ecf20Sopenharmony_ci
708c2ecf20Sopenharmony_ci  where MIN_SLOT_SHIFT is derived by difference from the others.
718c2ecf20Sopenharmony_ci
728c2ecf20Sopenharmony_ci  The max group index corresponds to Lmax/w_min, where
738c2ecf20Sopenharmony_ci  Lmax=1<<MTU_SHIFT, w_min = 1 .
748c2ecf20Sopenharmony_ci  From this, and knowing how many groups (MAX_INDEX) we want,
758c2ecf20Sopenharmony_ci  we can derive the shift corresponding to each group.
768c2ecf20Sopenharmony_ci
778c2ecf20Sopenharmony_ci  Because we often need to compute
788c2ecf20Sopenharmony_ci	F = S + len/w_i  and V = V + len/wsum
798c2ecf20Sopenharmony_ci  instead of storing w_i store the value
808c2ecf20Sopenharmony_ci	inv_w = (1<<FRAC_BITS)/w_i
818c2ecf20Sopenharmony_ci  so we can do F = S + len * inv_w * wsum.
828c2ecf20Sopenharmony_ci  We use W_TOT in the formulas so we can easily move between
838c2ecf20Sopenharmony_ci  static and adaptive weight sum.
848c2ecf20Sopenharmony_ci
858c2ecf20Sopenharmony_ci  The per-scheduler-instance data contain all the data structures
868c2ecf20Sopenharmony_ci  for the scheduler: bitmaps and bucket lists.
878c2ecf20Sopenharmony_ci
888c2ecf20Sopenharmony_ci */
898c2ecf20Sopenharmony_ci
908c2ecf20Sopenharmony_ci/*
918c2ecf20Sopenharmony_ci * Maximum number of consecutive slots occupied by backlogged classes
928c2ecf20Sopenharmony_ci * inside a group.
938c2ecf20Sopenharmony_ci */
948c2ecf20Sopenharmony_ci#define QFQ_MAX_SLOTS	32
958c2ecf20Sopenharmony_ci
968c2ecf20Sopenharmony_ci/*
978c2ecf20Sopenharmony_ci * Shifts used for aggregate<->group mapping.  We allow class weights that are
988c2ecf20Sopenharmony_ci * in the range [1, 2^MAX_WSHIFT], and we try to map each aggregate i to the
998c2ecf20Sopenharmony_ci * group with the smallest index that can support the L_i / r_i configured
1008c2ecf20Sopenharmony_ci * for the classes in the aggregate.
1018c2ecf20Sopenharmony_ci *
1028c2ecf20Sopenharmony_ci * grp->index is the index of the group; and grp->slot_shift
1038c2ecf20Sopenharmony_ci * is the shift for the corresponding (scaled) sigma_i.
1048c2ecf20Sopenharmony_ci */
1058c2ecf20Sopenharmony_ci#define QFQ_MAX_INDEX		24
1068c2ecf20Sopenharmony_ci#define QFQ_MAX_WSHIFT		10
1078c2ecf20Sopenharmony_ci
1088c2ecf20Sopenharmony_ci#define	QFQ_MAX_WEIGHT		(1<<QFQ_MAX_WSHIFT) /* see qfq_slot_insert */
1098c2ecf20Sopenharmony_ci#define QFQ_MAX_WSUM		(64*QFQ_MAX_WEIGHT)
1108c2ecf20Sopenharmony_ci
1118c2ecf20Sopenharmony_ci#define FRAC_BITS		30	/* fixed point arithmetic */
1128c2ecf20Sopenharmony_ci#define ONE_FP			(1UL << FRAC_BITS)
1138c2ecf20Sopenharmony_ci
1148c2ecf20Sopenharmony_ci#define QFQ_MTU_SHIFT		16	/* to support TSO/GSO */
1158c2ecf20Sopenharmony_ci#define QFQ_MIN_LMAX		512	/* see qfq_slot_insert */
1168c2ecf20Sopenharmony_ci#define QFQ_MAX_LMAX		(1UL << QFQ_MTU_SHIFT)
1178c2ecf20Sopenharmony_ci
1188c2ecf20Sopenharmony_ci#define QFQ_MAX_AGG_CLASSES	8 /* max num classes per aggregate allowed */
1198c2ecf20Sopenharmony_ci
1208c2ecf20Sopenharmony_ci/*
1218c2ecf20Sopenharmony_ci * Possible group states.  These values are used as indexes for the bitmaps
1228c2ecf20Sopenharmony_ci * array of struct qfq_queue.
1238c2ecf20Sopenharmony_ci */
1248c2ecf20Sopenharmony_cienum qfq_state { ER, IR, EB, IB, QFQ_MAX_STATE };
1258c2ecf20Sopenharmony_ci
1268c2ecf20Sopenharmony_cistruct qfq_group;
1278c2ecf20Sopenharmony_ci
1288c2ecf20Sopenharmony_cistruct qfq_aggregate;
1298c2ecf20Sopenharmony_ci
1308c2ecf20Sopenharmony_cistruct qfq_class {
1318c2ecf20Sopenharmony_ci	struct Qdisc_class_common common;
1328c2ecf20Sopenharmony_ci
1338c2ecf20Sopenharmony_ci	unsigned int filter_cnt;
1348c2ecf20Sopenharmony_ci
1358c2ecf20Sopenharmony_ci	struct gnet_stats_basic_packed bstats;
1368c2ecf20Sopenharmony_ci	struct gnet_stats_queue qstats;
1378c2ecf20Sopenharmony_ci	struct net_rate_estimator __rcu *rate_est;
1388c2ecf20Sopenharmony_ci	struct Qdisc *qdisc;
1398c2ecf20Sopenharmony_ci	struct list_head alist;		/* Link for active-classes list. */
1408c2ecf20Sopenharmony_ci	struct qfq_aggregate *agg;	/* Parent aggregate. */
1418c2ecf20Sopenharmony_ci	int deficit;			/* DRR deficit counter. */
1428c2ecf20Sopenharmony_ci};
1438c2ecf20Sopenharmony_ci
1448c2ecf20Sopenharmony_cistruct qfq_aggregate {
1458c2ecf20Sopenharmony_ci	struct hlist_node next;	/* Link for the slot list. */
1468c2ecf20Sopenharmony_ci	u64 S, F;		/* flow timestamps (exact) */
1478c2ecf20Sopenharmony_ci
1488c2ecf20Sopenharmony_ci	/* group we belong to. In principle we would need the index,
1498c2ecf20Sopenharmony_ci	 * which is log_2(lmax/weight), but we never reference it
1508c2ecf20Sopenharmony_ci	 * directly, only the group.
1518c2ecf20Sopenharmony_ci	 */
1528c2ecf20Sopenharmony_ci	struct qfq_group *grp;
1538c2ecf20Sopenharmony_ci
1548c2ecf20Sopenharmony_ci	/* these are copied from the flowset. */
1558c2ecf20Sopenharmony_ci	u32	class_weight; /* Weight of each class in this aggregate. */
1568c2ecf20Sopenharmony_ci	/* Max pkt size for the classes in this aggregate, DRR quantum. */
1578c2ecf20Sopenharmony_ci	int	lmax;
1588c2ecf20Sopenharmony_ci
1598c2ecf20Sopenharmony_ci	u32	inv_w;	    /* ONE_FP/(sum of weights of classes in aggr.). */
1608c2ecf20Sopenharmony_ci	u32	budgetmax;  /* Max budget for this aggregate. */
1618c2ecf20Sopenharmony_ci	u32	initial_budget, budget;     /* Initial and current budget. */
1628c2ecf20Sopenharmony_ci
1638c2ecf20Sopenharmony_ci	int		  num_classes;	/* Number of classes in this aggr. */
1648c2ecf20Sopenharmony_ci	struct list_head  active;	/* DRR queue of active classes. */
1658c2ecf20Sopenharmony_ci
1668c2ecf20Sopenharmony_ci	struct hlist_node nonfull_next;	/* See nonfull_aggs in qfq_sched. */
1678c2ecf20Sopenharmony_ci};
1688c2ecf20Sopenharmony_ci
1698c2ecf20Sopenharmony_cistruct qfq_group {
1708c2ecf20Sopenharmony_ci	u64 S, F;			/* group timestamps (approx). */
1718c2ecf20Sopenharmony_ci	unsigned int slot_shift;	/* Slot shift. */
1728c2ecf20Sopenharmony_ci	unsigned int index;		/* Group index. */
1738c2ecf20Sopenharmony_ci	unsigned int front;		/* Index of the front slot. */
1748c2ecf20Sopenharmony_ci	unsigned long full_slots;	/* non-empty slots */
1758c2ecf20Sopenharmony_ci
1768c2ecf20Sopenharmony_ci	/* Array of RR lists of active aggregates. */
1778c2ecf20Sopenharmony_ci	struct hlist_head slots[QFQ_MAX_SLOTS];
1788c2ecf20Sopenharmony_ci};
1798c2ecf20Sopenharmony_ci
1808c2ecf20Sopenharmony_cistruct qfq_sched {
1818c2ecf20Sopenharmony_ci	struct tcf_proto __rcu *filter_list;
1828c2ecf20Sopenharmony_ci	struct tcf_block	*block;
1838c2ecf20Sopenharmony_ci	struct Qdisc_class_hash clhash;
1848c2ecf20Sopenharmony_ci
1858c2ecf20Sopenharmony_ci	u64			oldV, V;	/* Precise virtual times. */
1868c2ecf20Sopenharmony_ci	struct qfq_aggregate	*in_serv_agg;   /* Aggregate being served. */
1878c2ecf20Sopenharmony_ci	u32			wsum;		/* weight sum */
1888c2ecf20Sopenharmony_ci	u32			iwsum;		/* inverse weight sum */
1898c2ecf20Sopenharmony_ci
1908c2ecf20Sopenharmony_ci	unsigned long bitmaps[QFQ_MAX_STATE];	    /* Group bitmaps. */
1918c2ecf20Sopenharmony_ci	struct qfq_group groups[QFQ_MAX_INDEX + 1]; /* The groups. */
1928c2ecf20Sopenharmony_ci	u32 min_slot_shift;	/* Index of the group-0 bit in the bitmaps. */
1938c2ecf20Sopenharmony_ci
1948c2ecf20Sopenharmony_ci	u32 max_agg_classes;		/* Max number of classes per aggr. */
1958c2ecf20Sopenharmony_ci	struct hlist_head nonfull_aggs; /* Aggs with room for more classes. */
1968c2ecf20Sopenharmony_ci};
1978c2ecf20Sopenharmony_ci
1988c2ecf20Sopenharmony_ci/*
1998c2ecf20Sopenharmony_ci * Possible reasons why the timestamps of an aggregate are updated
2008c2ecf20Sopenharmony_ci * enqueue: the aggregate switches from idle to active and must scheduled
2018c2ecf20Sopenharmony_ci *	    for service
2028c2ecf20Sopenharmony_ci * requeue: the aggregate finishes its budget, so it stops being served and
2038c2ecf20Sopenharmony_ci *	    must be rescheduled for service
2048c2ecf20Sopenharmony_ci */
2058c2ecf20Sopenharmony_cienum update_reason {enqueue, requeue};
2068c2ecf20Sopenharmony_ci
2078c2ecf20Sopenharmony_cistatic struct qfq_class *qfq_find_class(struct Qdisc *sch, u32 classid)
2088c2ecf20Sopenharmony_ci{
2098c2ecf20Sopenharmony_ci	struct qfq_sched *q = qdisc_priv(sch);
2108c2ecf20Sopenharmony_ci	struct Qdisc_class_common *clc;
2118c2ecf20Sopenharmony_ci
2128c2ecf20Sopenharmony_ci	clc = qdisc_class_find(&q->clhash, classid);
2138c2ecf20Sopenharmony_ci	if (clc == NULL)
2148c2ecf20Sopenharmony_ci		return NULL;
2158c2ecf20Sopenharmony_ci	return container_of(clc, struct qfq_class, common);
2168c2ecf20Sopenharmony_ci}
2178c2ecf20Sopenharmony_ci
2188c2ecf20Sopenharmony_cistatic struct netlink_range_validation lmax_range = {
2198c2ecf20Sopenharmony_ci	.min = QFQ_MIN_LMAX,
2208c2ecf20Sopenharmony_ci	.max = QFQ_MAX_LMAX,
2218c2ecf20Sopenharmony_ci};
2228c2ecf20Sopenharmony_ci
2238c2ecf20Sopenharmony_cistatic const struct nla_policy qfq_policy[TCA_QFQ_MAX + 1] = {
2248c2ecf20Sopenharmony_ci	[TCA_QFQ_WEIGHT] = NLA_POLICY_RANGE(NLA_U32, 1, QFQ_MAX_WEIGHT),
2258c2ecf20Sopenharmony_ci	[TCA_QFQ_LMAX] = NLA_POLICY_FULL_RANGE(NLA_U32, &lmax_range),
2268c2ecf20Sopenharmony_ci};
2278c2ecf20Sopenharmony_ci
2288c2ecf20Sopenharmony_ci/*
2298c2ecf20Sopenharmony_ci * Calculate a flow index, given its weight and maximum packet length.
2308c2ecf20Sopenharmony_ci * index = log_2(maxlen/weight) but we need to apply the scaling.
2318c2ecf20Sopenharmony_ci * This is used only once at flow creation.
2328c2ecf20Sopenharmony_ci */
2338c2ecf20Sopenharmony_cistatic int qfq_calc_index(u32 inv_w, unsigned int maxlen, u32 min_slot_shift)
2348c2ecf20Sopenharmony_ci{
2358c2ecf20Sopenharmony_ci	u64 slot_size = (u64)maxlen * inv_w;
2368c2ecf20Sopenharmony_ci	unsigned long size_map;
2378c2ecf20Sopenharmony_ci	int index = 0;
2388c2ecf20Sopenharmony_ci
2398c2ecf20Sopenharmony_ci	size_map = slot_size >> min_slot_shift;
2408c2ecf20Sopenharmony_ci	if (!size_map)
2418c2ecf20Sopenharmony_ci		goto out;
2428c2ecf20Sopenharmony_ci
2438c2ecf20Sopenharmony_ci	index = __fls(size_map) + 1;	/* basically a log_2 */
2448c2ecf20Sopenharmony_ci	index -= !(slot_size - (1ULL << (index + min_slot_shift - 1)));
2458c2ecf20Sopenharmony_ci
2468c2ecf20Sopenharmony_ci	if (index < 0)
2478c2ecf20Sopenharmony_ci		index = 0;
2488c2ecf20Sopenharmony_ciout:
2498c2ecf20Sopenharmony_ci	pr_debug("qfq calc_index: W = %lu, L = %u, I = %d\n",
2508c2ecf20Sopenharmony_ci		 (unsigned long) ONE_FP/inv_w, maxlen, index);
2518c2ecf20Sopenharmony_ci
2528c2ecf20Sopenharmony_ci	return index;
2538c2ecf20Sopenharmony_ci}
2548c2ecf20Sopenharmony_ci
2558c2ecf20Sopenharmony_cistatic void qfq_deactivate_agg(struct qfq_sched *, struct qfq_aggregate *);
2568c2ecf20Sopenharmony_cistatic void qfq_activate_agg(struct qfq_sched *, struct qfq_aggregate *,
2578c2ecf20Sopenharmony_ci			     enum update_reason);
2588c2ecf20Sopenharmony_ci
2598c2ecf20Sopenharmony_cistatic void qfq_init_agg(struct qfq_sched *q, struct qfq_aggregate *agg,
2608c2ecf20Sopenharmony_ci			 u32 lmax, u32 weight)
2618c2ecf20Sopenharmony_ci{
2628c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&agg->active);
2638c2ecf20Sopenharmony_ci	hlist_add_head(&agg->nonfull_next, &q->nonfull_aggs);
2648c2ecf20Sopenharmony_ci
2658c2ecf20Sopenharmony_ci	agg->lmax = lmax;
2668c2ecf20Sopenharmony_ci	agg->class_weight = weight;
2678c2ecf20Sopenharmony_ci}
2688c2ecf20Sopenharmony_ci
2698c2ecf20Sopenharmony_cistatic struct qfq_aggregate *qfq_find_agg(struct qfq_sched *q,
2708c2ecf20Sopenharmony_ci					  u32 lmax, u32 weight)
2718c2ecf20Sopenharmony_ci{
2728c2ecf20Sopenharmony_ci	struct qfq_aggregate *agg;
2738c2ecf20Sopenharmony_ci
2748c2ecf20Sopenharmony_ci	hlist_for_each_entry(agg, &q->nonfull_aggs, nonfull_next)
2758c2ecf20Sopenharmony_ci		if (agg->lmax == lmax && agg->class_weight == weight)
2768c2ecf20Sopenharmony_ci			return agg;
2778c2ecf20Sopenharmony_ci
2788c2ecf20Sopenharmony_ci	return NULL;
2798c2ecf20Sopenharmony_ci}
2808c2ecf20Sopenharmony_ci
2818c2ecf20Sopenharmony_ci
2828c2ecf20Sopenharmony_ci/* Update aggregate as a function of the new number of classes. */
2838c2ecf20Sopenharmony_cistatic void qfq_update_agg(struct qfq_sched *q, struct qfq_aggregate *agg,
2848c2ecf20Sopenharmony_ci			   int new_num_classes)
2858c2ecf20Sopenharmony_ci{
2868c2ecf20Sopenharmony_ci	u32 new_agg_weight;
2878c2ecf20Sopenharmony_ci
2888c2ecf20Sopenharmony_ci	if (new_num_classes == q->max_agg_classes)
2898c2ecf20Sopenharmony_ci		hlist_del_init(&agg->nonfull_next);
2908c2ecf20Sopenharmony_ci
2918c2ecf20Sopenharmony_ci	if (agg->num_classes > new_num_classes &&
2928c2ecf20Sopenharmony_ci	    new_num_classes == q->max_agg_classes - 1) /* agg no more full */
2938c2ecf20Sopenharmony_ci		hlist_add_head(&agg->nonfull_next, &q->nonfull_aggs);
2948c2ecf20Sopenharmony_ci
2958c2ecf20Sopenharmony_ci	/* The next assignment may let
2968c2ecf20Sopenharmony_ci	 * agg->initial_budget > agg->budgetmax
2978c2ecf20Sopenharmony_ci	 * hold, we will take it into account in charge_actual_service().
2988c2ecf20Sopenharmony_ci	 */
2998c2ecf20Sopenharmony_ci	agg->budgetmax = new_num_classes * agg->lmax;
3008c2ecf20Sopenharmony_ci	new_agg_weight = agg->class_weight * new_num_classes;
3018c2ecf20Sopenharmony_ci	agg->inv_w = ONE_FP/new_agg_weight;
3028c2ecf20Sopenharmony_ci
3038c2ecf20Sopenharmony_ci	if (agg->grp == NULL) {
3048c2ecf20Sopenharmony_ci		int i = qfq_calc_index(agg->inv_w, agg->budgetmax,
3058c2ecf20Sopenharmony_ci				       q->min_slot_shift);
3068c2ecf20Sopenharmony_ci		agg->grp = &q->groups[i];
3078c2ecf20Sopenharmony_ci	}
3088c2ecf20Sopenharmony_ci
3098c2ecf20Sopenharmony_ci	q->wsum +=
3108c2ecf20Sopenharmony_ci		(int) agg->class_weight * (new_num_classes - agg->num_classes);
3118c2ecf20Sopenharmony_ci	q->iwsum = ONE_FP / q->wsum;
3128c2ecf20Sopenharmony_ci
3138c2ecf20Sopenharmony_ci	agg->num_classes = new_num_classes;
3148c2ecf20Sopenharmony_ci}
3158c2ecf20Sopenharmony_ci
3168c2ecf20Sopenharmony_ci/* Add class to aggregate. */
3178c2ecf20Sopenharmony_cistatic void qfq_add_to_agg(struct qfq_sched *q,
3188c2ecf20Sopenharmony_ci			   struct qfq_aggregate *agg,
3198c2ecf20Sopenharmony_ci			   struct qfq_class *cl)
3208c2ecf20Sopenharmony_ci{
3218c2ecf20Sopenharmony_ci	cl->agg = agg;
3228c2ecf20Sopenharmony_ci
3238c2ecf20Sopenharmony_ci	qfq_update_agg(q, agg, agg->num_classes+1);
3248c2ecf20Sopenharmony_ci	if (cl->qdisc->q.qlen > 0) { /* adding an active class */
3258c2ecf20Sopenharmony_ci		list_add_tail(&cl->alist, &agg->active);
3268c2ecf20Sopenharmony_ci		if (list_first_entry(&agg->active, struct qfq_class, alist) ==
3278c2ecf20Sopenharmony_ci		    cl && q->in_serv_agg != agg) /* agg was inactive */
3288c2ecf20Sopenharmony_ci			qfq_activate_agg(q, agg, enqueue); /* schedule agg */
3298c2ecf20Sopenharmony_ci	}
3308c2ecf20Sopenharmony_ci}
3318c2ecf20Sopenharmony_ci
3328c2ecf20Sopenharmony_cistatic struct qfq_aggregate *qfq_choose_next_agg(struct qfq_sched *);
3338c2ecf20Sopenharmony_ci
3348c2ecf20Sopenharmony_cistatic void qfq_destroy_agg(struct qfq_sched *q, struct qfq_aggregate *agg)
3358c2ecf20Sopenharmony_ci{
3368c2ecf20Sopenharmony_ci	hlist_del_init(&agg->nonfull_next);
3378c2ecf20Sopenharmony_ci	q->wsum -= agg->class_weight;
3388c2ecf20Sopenharmony_ci	if (q->wsum != 0)
3398c2ecf20Sopenharmony_ci		q->iwsum = ONE_FP / q->wsum;
3408c2ecf20Sopenharmony_ci
3418c2ecf20Sopenharmony_ci	if (q->in_serv_agg == agg)
3428c2ecf20Sopenharmony_ci		q->in_serv_agg = qfq_choose_next_agg(q);
3438c2ecf20Sopenharmony_ci	kfree(agg);
3448c2ecf20Sopenharmony_ci}
3458c2ecf20Sopenharmony_ci
3468c2ecf20Sopenharmony_ci/* Deschedule class from within its parent aggregate. */
3478c2ecf20Sopenharmony_cistatic void qfq_deactivate_class(struct qfq_sched *q, struct qfq_class *cl)
3488c2ecf20Sopenharmony_ci{
3498c2ecf20Sopenharmony_ci	struct qfq_aggregate *agg = cl->agg;
3508c2ecf20Sopenharmony_ci
3518c2ecf20Sopenharmony_ci
3528c2ecf20Sopenharmony_ci	list_del(&cl->alist); /* remove from RR queue of the aggregate */
3538c2ecf20Sopenharmony_ci	if (list_empty(&agg->active)) /* agg is now inactive */
3548c2ecf20Sopenharmony_ci		qfq_deactivate_agg(q, agg);
3558c2ecf20Sopenharmony_ci}
3568c2ecf20Sopenharmony_ci
3578c2ecf20Sopenharmony_ci/* Remove class from its parent aggregate. */
3588c2ecf20Sopenharmony_cistatic void qfq_rm_from_agg(struct qfq_sched *q, struct qfq_class *cl)
3598c2ecf20Sopenharmony_ci{
3608c2ecf20Sopenharmony_ci	struct qfq_aggregate *agg = cl->agg;
3618c2ecf20Sopenharmony_ci
3628c2ecf20Sopenharmony_ci	cl->agg = NULL;
3638c2ecf20Sopenharmony_ci	if (agg->num_classes == 1) { /* agg being emptied, destroy it */
3648c2ecf20Sopenharmony_ci		qfq_destroy_agg(q, agg);
3658c2ecf20Sopenharmony_ci		return;
3668c2ecf20Sopenharmony_ci	}
3678c2ecf20Sopenharmony_ci	qfq_update_agg(q, agg, agg->num_classes-1);
3688c2ecf20Sopenharmony_ci}
3698c2ecf20Sopenharmony_ci
3708c2ecf20Sopenharmony_ci/* Deschedule class and remove it from its parent aggregate. */
3718c2ecf20Sopenharmony_cistatic void qfq_deact_rm_from_agg(struct qfq_sched *q, struct qfq_class *cl)
3728c2ecf20Sopenharmony_ci{
3738c2ecf20Sopenharmony_ci	if (cl->qdisc->q.qlen > 0) /* class is active */
3748c2ecf20Sopenharmony_ci		qfq_deactivate_class(q, cl);
3758c2ecf20Sopenharmony_ci
3768c2ecf20Sopenharmony_ci	qfq_rm_from_agg(q, cl);
3778c2ecf20Sopenharmony_ci}
3788c2ecf20Sopenharmony_ci
3798c2ecf20Sopenharmony_ci/* Move class to a new aggregate, matching the new class weight and/or lmax */
3808c2ecf20Sopenharmony_cistatic int qfq_change_agg(struct Qdisc *sch, struct qfq_class *cl, u32 weight,
3818c2ecf20Sopenharmony_ci			   u32 lmax)
3828c2ecf20Sopenharmony_ci{
3838c2ecf20Sopenharmony_ci	struct qfq_sched *q = qdisc_priv(sch);
3848c2ecf20Sopenharmony_ci	struct qfq_aggregate *new_agg;
3858c2ecf20Sopenharmony_ci
3868c2ecf20Sopenharmony_ci	/* 'lmax' can range from [QFQ_MIN_LMAX, pktlen + stab overhead] */
3878c2ecf20Sopenharmony_ci	if (lmax > QFQ_MAX_LMAX)
3888c2ecf20Sopenharmony_ci		return -EINVAL;
3898c2ecf20Sopenharmony_ci
3908c2ecf20Sopenharmony_ci	new_agg = qfq_find_agg(q, lmax, weight);
3918c2ecf20Sopenharmony_ci	if (new_agg == NULL) { /* create new aggregate */
3928c2ecf20Sopenharmony_ci		new_agg = kzalloc(sizeof(*new_agg), GFP_ATOMIC);
3938c2ecf20Sopenharmony_ci		if (new_agg == NULL)
3948c2ecf20Sopenharmony_ci			return -ENOBUFS;
3958c2ecf20Sopenharmony_ci		qfq_init_agg(q, new_agg, lmax, weight);
3968c2ecf20Sopenharmony_ci	}
3978c2ecf20Sopenharmony_ci	qfq_deact_rm_from_agg(q, cl);
3988c2ecf20Sopenharmony_ci	qfq_add_to_agg(q, new_agg, cl);
3998c2ecf20Sopenharmony_ci
4008c2ecf20Sopenharmony_ci	return 0;
4018c2ecf20Sopenharmony_ci}
4028c2ecf20Sopenharmony_ci
4038c2ecf20Sopenharmony_cistatic int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
4048c2ecf20Sopenharmony_ci			    struct nlattr **tca, unsigned long *arg,
4058c2ecf20Sopenharmony_ci			    struct netlink_ext_ack *extack)
4068c2ecf20Sopenharmony_ci{
4078c2ecf20Sopenharmony_ci	struct qfq_sched *q = qdisc_priv(sch);
4088c2ecf20Sopenharmony_ci	struct qfq_class *cl = (struct qfq_class *)*arg;
4098c2ecf20Sopenharmony_ci	bool existing = false;
4108c2ecf20Sopenharmony_ci	struct nlattr *tb[TCA_QFQ_MAX + 1];
4118c2ecf20Sopenharmony_ci	struct qfq_aggregate *new_agg = NULL;
4128c2ecf20Sopenharmony_ci	u32 weight, lmax, inv_w;
4138c2ecf20Sopenharmony_ci	int err;
4148c2ecf20Sopenharmony_ci	int delta_w;
4158c2ecf20Sopenharmony_ci
4168c2ecf20Sopenharmony_ci	if (tca[TCA_OPTIONS] == NULL) {
4178c2ecf20Sopenharmony_ci		pr_notice("qfq: no options\n");
4188c2ecf20Sopenharmony_ci		return -EINVAL;
4198c2ecf20Sopenharmony_ci	}
4208c2ecf20Sopenharmony_ci
4218c2ecf20Sopenharmony_ci	err = nla_parse_nested_deprecated(tb, TCA_QFQ_MAX, tca[TCA_OPTIONS],
4228c2ecf20Sopenharmony_ci					  qfq_policy, extack);
4238c2ecf20Sopenharmony_ci	if (err < 0)
4248c2ecf20Sopenharmony_ci		return err;
4258c2ecf20Sopenharmony_ci
4268c2ecf20Sopenharmony_ci	if (tb[TCA_QFQ_WEIGHT])
4278c2ecf20Sopenharmony_ci		weight = nla_get_u32(tb[TCA_QFQ_WEIGHT]);
4288c2ecf20Sopenharmony_ci	else
4298c2ecf20Sopenharmony_ci		weight = 1;
4308c2ecf20Sopenharmony_ci
4318c2ecf20Sopenharmony_ci	if (tb[TCA_QFQ_LMAX]) {
4328c2ecf20Sopenharmony_ci		lmax = nla_get_u32(tb[TCA_QFQ_LMAX]);
4338c2ecf20Sopenharmony_ci	} else {
4348c2ecf20Sopenharmony_ci		/* MTU size is user controlled */
4358c2ecf20Sopenharmony_ci		lmax = psched_mtu(qdisc_dev(sch));
4368c2ecf20Sopenharmony_ci		if (lmax < QFQ_MIN_LMAX || lmax > QFQ_MAX_LMAX) {
4378c2ecf20Sopenharmony_ci			NL_SET_ERR_MSG_MOD(extack,
4388c2ecf20Sopenharmony_ci					   "MTU size out of bounds for qfq");
4398c2ecf20Sopenharmony_ci			return -EINVAL;
4408c2ecf20Sopenharmony_ci		}
4418c2ecf20Sopenharmony_ci	}
4428c2ecf20Sopenharmony_ci
4438c2ecf20Sopenharmony_ci	inv_w = ONE_FP / weight;
4448c2ecf20Sopenharmony_ci	weight = ONE_FP / inv_w;
4458c2ecf20Sopenharmony_ci
4468c2ecf20Sopenharmony_ci	if (cl != NULL &&
4478c2ecf20Sopenharmony_ci	    lmax == cl->agg->lmax &&
4488c2ecf20Sopenharmony_ci	    weight == cl->agg->class_weight)
4498c2ecf20Sopenharmony_ci		return 0; /* nothing to change */
4508c2ecf20Sopenharmony_ci
4518c2ecf20Sopenharmony_ci	delta_w = weight - (cl ? cl->agg->class_weight : 0);
4528c2ecf20Sopenharmony_ci
4538c2ecf20Sopenharmony_ci	if (q->wsum + delta_w > QFQ_MAX_WSUM) {
4548c2ecf20Sopenharmony_ci		pr_notice("qfq: total weight out of range (%d + %u)\n",
4558c2ecf20Sopenharmony_ci			  delta_w, q->wsum);
4568c2ecf20Sopenharmony_ci		return -EINVAL;
4578c2ecf20Sopenharmony_ci	}
4588c2ecf20Sopenharmony_ci
4598c2ecf20Sopenharmony_ci	if (cl != NULL) { /* modify existing class */
4608c2ecf20Sopenharmony_ci		if (tca[TCA_RATE]) {
4618c2ecf20Sopenharmony_ci			err = gen_replace_estimator(&cl->bstats, NULL,
4628c2ecf20Sopenharmony_ci						    &cl->rate_est,
4638c2ecf20Sopenharmony_ci						    NULL,
4648c2ecf20Sopenharmony_ci						    qdisc_root_sleeping_running(sch),
4658c2ecf20Sopenharmony_ci						    tca[TCA_RATE]);
4668c2ecf20Sopenharmony_ci			if (err)
4678c2ecf20Sopenharmony_ci				return err;
4688c2ecf20Sopenharmony_ci		}
4698c2ecf20Sopenharmony_ci		existing = true;
4708c2ecf20Sopenharmony_ci		goto set_change_agg;
4718c2ecf20Sopenharmony_ci	}
4728c2ecf20Sopenharmony_ci
4738c2ecf20Sopenharmony_ci	/* create and init new class */
4748c2ecf20Sopenharmony_ci	cl = kzalloc(sizeof(struct qfq_class), GFP_KERNEL);
4758c2ecf20Sopenharmony_ci	if (cl == NULL)
4768c2ecf20Sopenharmony_ci		return -ENOBUFS;
4778c2ecf20Sopenharmony_ci
4788c2ecf20Sopenharmony_ci	cl->common.classid = classid;
4798c2ecf20Sopenharmony_ci	cl->deficit = lmax;
4808c2ecf20Sopenharmony_ci
4818c2ecf20Sopenharmony_ci	cl->qdisc = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
4828c2ecf20Sopenharmony_ci				      classid, NULL);
4838c2ecf20Sopenharmony_ci	if (cl->qdisc == NULL)
4848c2ecf20Sopenharmony_ci		cl->qdisc = &noop_qdisc;
4858c2ecf20Sopenharmony_ci
4868c2ecf20Sopenharmony_ci	if (tca[TCA_RATE]) {
4878c2ecf20Sopenharmony_ci		err = gen_new_estimator(&cl->bstats, NULL,
4888c2ecf20Sopenharmony_ci					&cl->rate_est,
4898c2ecf20Sopenharmony_ci					NULL,
4908c2ecf20Sopenharmony_ci					qdisc_root_sleeping_running(sch),
4918c2ecf20Sopenharmony_ci					tca[TCA_RATE]);
4928c2ecf20Sopenharmony_ci		if (err)
4938c2ecf20Sopenharmony_ci			goto destroy_class;
4948c2ecf20Sopenharmony_ci	}
4958c2ecf20Sopenharmony_ci
4968c2ecf20Sopenharmony_ci	if (cl->qdisc != &noop_qdisc)
4978c2ecf20Sopenharmony_ci		qdisc_hash_add(cl->qdisc, true);
4988c2ecf20Sopenharmony_ci
4998c2ecf20Sopenharmony_ciset_change_agg:
5008c2ecf20Sopenharmony_ci	sch_tree_lock(sch);
5018c2ecf20Sopenharmony_ci	new_agg = qfq_find_agg(q, lmax, weight);
5028c2ecf20Sopenharmony_ci	if (new_agg == NULL) { /* create new aggregate */
5038c2ecf20Sopenharmony_ci		sch_tree_unlock(sch);
5048c2ecf20Sopenharmony_ci		new_agg = kzalloc(sizeof(*new_agg), GFP_KERNEL);
5058c2ecf20Sopenharmony_ci		if (new_agg == NULL) {
5068c2ecf20Sopenharmony_ci			err = -ENOBUFS;
5078c2ecf20Sopenharmony_ci			gen_kill_estimator(&cl->rate_est);
5088c2ecf20Sopenharmony_ci			goto destroy_class;
5098c2ecf20Sopenharmony_ci		}
5108c2ecf20Sopenharmony_ci		sch_tree_lock(sch);
5118c2ecf20Sopenharmony_ci		qfq_init_agg(q, new_agg, lmax, weight);
5128c2ecf20Sopenharmony_ci	}
5138c2ecf20Sopenharmony_ci	if (existing)
5148c2ecf20Sopenharmony_ci		qfq_deact_rm_from_agg(q, cl);
5158c2ecf20Sopenharmony_ci	else
5168c2ecf20Sopenharmony_ci		qdisc_class_hash_insert(&q->clhash, &cl->common);
5178c2ecf20Sopenharmony_ci	qfq_add_to_agg(q, new_agg, cl);
5188c2ecf20Sopenharmony_ci	sch_tree_unlock(sch);
5198c2ecf20Sopenharmony_ci	qdisc_class_hash_grow(sch, &q->clhash);
5208c2ecf20Sopenharmony_ci
5218c2ecf20Sopenharmony_ci	*arg = (unsigned long)cl;
5228c2ecf20Sopenharmony_ci	return 0;
5238c2ecf20Sopenharmony_ci
5248c2ecf20Sopenharmony_cidestroy_class:
5258c2ecf20Sopenharmony_ci	qdisc_put(cl->qdisc);
5268c2ecf20Sopenharmony_ci	kfree(cl);
5278c2ecf20Sopenharmony_ci	return err;
5288c2ecf20Sopenharmony_ci}
5298c2ecf20Sopenharmony_ci
5308c2ecf20Sopenharmony_cistatic void qfq_destroy_class(struct Qdisc *sch, struct qfq_class *cl)
5318c2ecf20Sopenharmony_ci{
5328c2ecf20Sopenharmony_ci	struct qfq_sched *q = qdisc_priv(sch);
5338c2ecf20Sopenharmony_ci
5348c2ecf20Sopenharmony_ci	qfq_rm_from_agg(q, cl);
5358c2ecf20Sopenharmony_ci	gen_kill_estimator(&cl->rate_est);
5368c2ecf20Sopenharmony_ci	qdisc_put(cl->qdisc);
5378c2ecf20Sopenharmony_ci	kfree(cl);
5388c2ecf20Sopenharmony_ci}
5398c2ecf20Sopenharmony_ci
5408c2ecf20Sopenharmony_cistatic int qfq_delete_class(struct Qdisc *sch, unsigned long arg)
5418c2ecf20Sopenharmony_ci{
5428c2ecf20Sopenharmony_ci	struct qfq_sched *q = qdisc_priv(sch);
5438c2ecf20Sopenharmony_ci	struct qfq_class *cl = (struct qfq_class *)arg;
5448c2ecf20Sopenharmony_ci
5458c2ecf20Sopenharmony_ci	if (cl->filter_cnt > 0)
5468c2ecf20Sopenharmony_ci		return -EBUSY;
5478c2ecf20Sopenharmony_ci
5488c2ecf20Sopenharmony_ci	sch_tree_lock(sch);
5498c2ecf20Sopenharmony_ci
5508c2ecf20Sopenharmony_ci	qdisc_purge_queue(cl->qdisc);
5518c2ecf20Sopenharmony_ci	qdisc_class_hash_remove(&q->clhash, &cl->common);
5528c2ecf20Sopenharmony_ci
5538c2ecf20Sopenharmony_ci	sch_tree_unlock(sch);
5548c2ecf20Sopenharmony_ci
5558c2ecf20Sopenharmony_ci	qfq_destroy_class(sch, cl);
5568c2ecf20Sopenharmony_ci	return 0;
5578c2ecf20Sopenharmony_ci}
5588c2ecf20Sopenharmony_ci
5598c2ecf20Sopenharmony_cistatic unsigned long qfq_search_class(struct Qdisc *sch, u32 classid)
5608c2ecf20Sopenharmony_ci{
5618c2ecf20Sopenharmony_ci	return (unsigned long)qfq_find_class(sch, classid);
5628c2ecf20Sopenharmony_ci}
5638c2ecf20Sopenharmony_ci
5648c2ecf20Sopenharmony_cistatic struct tcf_block *qfq_tcf_block(struct Qdisc *sch, unsigned long cl,
5658c2ecf20Sopenharmony_ci				       struct netlink_ext_ack *extack)
5668c2ecf20Sopenharmony_ci{
5678c2ecf20Sopenharmony_ci	struct qfq_sched *q = qdisc_priv(sch);
5688c2ecf20Sopenharmony_ci
5698c2ecf20Sopenharmony_ci	if (cl)
5708c2ecf20Sopenharmony_ci		return NULL;
5718c2ecf20Sopenharmony_ci
5728c2ecf20Sopenharmony_ci	return q->block;
5738c2ecf20Sopenharmony_ci}
5748c2ecf20Sopenharmony_ci
5758c2ecf20Sopenharmony_cistatic unsigned long qfq_bind_tcf(struct Qdisc *sch, unsigned long parent,
5768c2ecf20Sopenharmony_ci				  u32 classid)
5778c2ecf20Sopenharmony_ci{
5788c2ecf20Sopenharmony_ci	struct qfq_class *cl = qfq_find_class(sch, classid);
5798c2ecf20Sopenharmony_ci
5808c2ecf20Sopenharmony_ci	if (cl != NULL)
5818c2ecf20Sopenharmony_ci		cl->filter_cnt++;
5828c2ecf20Sopenharmony_ci
5838c2ecf20Sopenharmony_ci	return (unsigned long)cl;
5848c2ecf20Sopenharmony_ci}
5858c2ecf20Sopenharmony_ci
5868c2ecf20Sopenharmony_cistatic void qfq_unbind_tcf(struct Qdisc *sch, unsigned long arg)
5878c2ecf20Sopenharmony_ci{
5888c2ecf20Sopenharmony_ci	struct qfq_class *cl = (struct qfq_class *)arg;
5898c2ecf20Sopenharmony_ci
5908c2ecf20Sopenharmony_ci	cl->filter_cnt--;
5918c2ecf20Sopenharmony_ci}
5928c2ecf20Sopenharmony_ci
5938c2ecf20Sopenharmony_cistatic int qfq_graft_class(struct Qdisc *sch, unsigned long arg,
5948c2ecf20Sopenharmony_ci			   struct Qdisc *new, struct Qdisc **old,
5958c2ecf20Sopenharmony_ci			   struct netlink_ext_ack *extack)
5968c2ecf20Sopenharmony_ci{
5978c2ecf20Sopenharmony_ci	struct qfq_class *cl = (struct qfq_class *)arg;
5988c2ecf20Sopenharmony_ci
5998c2ecf20Sopenharmony_ci	if (new == NULL) {
6008c2ecf20Sopenharmony_ci		new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
6018c2ecf20Sopenharmony_ci					cl->common.classid, NULL);
6028c2ecf20Sopenharmony_ci		if (new == NULL)
6038c2ecf20Sopenharmony_ci			new = &noop_qdisc;
6048c2ecf20Sopenharmony_ci	}
6058c2ecf20Sopenharmony_ci
6068c2ecf20Sopenharmony_ci	*old = qdisc_replace(sch, new, &cl->qdisc);
6078c2ecf20Sopenharmony_ci	return 0;
6088c2ecf20Sopenharmony_ci}
6098c2ecf20Sopenharmony_ci
6108c2ecf20Sopenharmony_cistatic struct Qdisc *qfq_class_leaf(struct Qdisc *sch, unsigned long arg)
6118c2ecf20Sopenharmony_ci{
6128c2ecf20Sopenharmony_ci	struct qfq_class *cl = (struct qfq_class *)arg;
6138c2ecf20Sopenharmony_ci
6148c2ecf20Sopenharmony_ci	return cl->qdisc;
6158c2ecf20Sopenharmony_ci}
6168c2ecf20Sopenharmony_ci
6178c2ecf20Sopenharmony_cistatic int qfq_dump_class(struct Qdisc *sch, unsigned long arg,
6188c2ecf20Sopenharmony_ci			  struct sk_buff *skb, struct tcmsg *tcm)
6198c2ecf20Sopenharmony_ci{
6208c2ecf20Sopenharmony_ci	struct qfq_class *cl = (struct qfq_class *)arg;
6218c2ecf20Sopenharmony_ci	struct nlattr *nest;
6228c2ecf20Sopenharmony_ci
6238c2ecf20Sopenharmony_ci	tcm->tcm_parent	= TC_H_ROOT;
6248c2ecf20Sopenharmony_ci	tcm->tcm_handle	= cl->common.classid;
6258c2ecf20Sopenharmony_ci	tcm->tcm_info	= cl->qdisc->handle;
6268c2ecf20Sopenharmony_ci
6278c2ecf20Sopenharmony_ci	nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
6288c2ecf20Sopenharmony_ci	if (nest == NULL)
6298c2ecf20Sopenharmony_ci		goto nla_put_failure;
6308c2ecf20Sopenharmony_ci	if (nla_put_u32(skb, TCA_QFQ_WEIGHT, cl->agg->class_weight) ||
6318c2ecf20Sopenharmony_ci	    nla_put_u32(skb, TCA_QFQ_LMAX, cl->agg->lmax))
6328c2ecf20Sopenharmony_ci		goto nla_put_failure;
6338c2ecf20Sopenharmony_ci	return nla_nest_end(skb, nest);
6348c2ecf20Sopenharmony_ci
6358c2ecf20Sopenharmony_cinla_put_failure:
6368c2ecf20Sopenharmony_ci	nla_nest_cancel(skb, nest);
6378c2ecf20Sopenharmony_ci	return -EMSGSIZE;
6388c2ecf20Sopenharmony_ci}
6398c2ecf20Sopenharmony_ci
6408c2ecf20Sopenharmony_cistatic int qfq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
6418c2ecf20Sopenharmony_ci				struct gnet_dump *d)
6428c2ecf20Sopenharmony_ci{
6438c2ecf20Sopenharmony_ci	struct qfq_class *cl = (struct qfq_class *)arg;
6448c2ecf20Sopenharmony_ci	struct tc_qfq_stats xstats;
6458c2ecf20Sopenharmony_ci
6468c2ecf20Sopenharmony_ci	memset(&xstats, 0, sizeof(xstats));
6478c2ecf20Sopenharmony_ci
6488c2ecf20Sopenharmony_ci	xstats.weight = cl->agg->class_weight;
6498c2ecf20Sopenharmony_ci	xstats.lmax = cl->agg->lmax;
6508c2ecf20Sopenharmony_ci
6518c2ecf20Sopenharmony_ci	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
6528c2ecf20Sopenharmony_ci				  d, NULL, &cl->bstats) < 0 ||
6538c2ecf20Sopenharmony_ci	    gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
6548c2ecf20Sopenharmony_ci	    qdisc_qstats_copy(d, cl->qdisc) < 0)
6558c2ecf20Sopenharmony_ci		return -1;
6568c2ecf20Sopenharmony_ci
6578c2ecf20Sopenharmony_ci	return gnet_stats_copy_app(d, &xstats, sizeof(xstats));
6588c2ecf20Sopenharmony_ci}
6598c2ecf20Sopenharmony_ci
6608c2ecf20Sopenharmony_cistatic void qfq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
6618c2ecf20Sopenharmony_ci{
6628c2ecf20Sopenharmony_ci	struct qfq_sched *q = qdisc_priv(sch);
6638c2ecf20Sopenharmony_ci	struct qfq_class *cl;
6648c2ecf20Sopenharmony_ci	unsigned int i;
6658c2ecf20Sopenharmony_ci
6668c2ecf20Sopenharmony_ci	if (arg->stop)
6678c2ecf20Sopenharmony_ci		return;
6688c2ecf20Sopenharmony_ci
6698c2ecf20Sopenharmony_ci	for (i = 0; i < q->clhash.hashsize; i++) {
6708c2ecf20Sopenharmony_ci		hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
6718c2ecf20Sopenharmony_ci			if (arg->count < arg->skip) {
6728c2ecf20Sopenharmony_ci				arg->count++;
6738c2ecf20Sopenharmony_ci				continue;
6748c2ecf20Sopenharmony_ci			}
6758c2ecf20Sopenharmony_ci			if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
6768c2ecf20Sopenharmony_ci				arg->stop = 1;
6778c2ecf20Sopenharmony_ci				return;
6788c2ecf20Sopenharmony_ci			}
6798c2ecf20Sopenharmony_ci			arg->count++;
6808c2ecf20Sopenharmony_ci		}
6818c2ecf20Sopenharmony_ci	}
6828c2ecf20Sopenharmony_ci}
6838c2ecf20Sopenharmony_ci
6848c2ecf20Sopenharmony_cistatic struct qfq_class *qfq_classify(struct sk_buff *skb, struct Qdisc *sch,
6858c2ecf20Sopenharmony_ci				      int *qerr)
6868c2ecf20Sopenharmony_ci{
6878c2ecf20Sopenharmony_ci	struct qfq_sched *q = qdisc_priv(sch);
6888c2ecf20Sopenharmony_ci	struct qfq_class *cl;
6898c2ecf20Sopenharmony_ci	struct tcf_result res;
6908c2ecf20Sopenharmony_ci	struct tcf_proto *fl;
6918c2ecf20Sopenharmony_ci	int result;
6928c2ecf20Sopenharmony_ci
6938c2ecf20Sopenharmony_ci	if (TC_H_MAJ(skb->priority ^ sch->handle) == 0) {
6948c2ecf20Sopenharmony_ci		pr_debug("qfq_classify: found %d\n", skb->priority);
6958c2ecf20Sopenharmony_ci		cl = qfq_find_class(sch, skb->priority);
6968c2ecf20Sopenharmony_ci		if (cl != NULL)
6978c2ecf20Sopenharmony_ci			return cl;
6988c2ecf20Sopenharmony_ci	}
6998c2ecf20Sopenharmony_ci
7008c2ecf20Sopenharmony_ci	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
7018c2ecf20Sopenharmony_ci	fl = rcu_dereference_bh(q->filter_list);
7028c2ecf20Sopenharmony_ci	result = tcf_classify(skb, fl, &res, false);
7038c2ecf20Sopenharmony_ci	if (result >= 0) {
7048c2ecf20Sopenharmony_ci#ifdef CONFIG_NET_CLS_ACT
7058c2ecf20Sopenharmony_ci		switch (result) {
7068c2ecf20Sopenharmony_ci		case TC_ACT_QUEUED:
7078c2ecf20Sopenharmony_ci		case TC_ACT_STOLEN:
7088c2ecf20Sopenharmony_ci		case TC_ACT_TRAP:
7098c2ecf20Sopenharmony_ci			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
7108c2ecf20Sopenharmony_ci			fallthrough;
7118c2ecf20Sopenharmony_ci		case TC_ACT_SHOT:
7128c2ecf20Sopenharmony_ci			return NULL;
7138c2ecf20Sopenharmony_ci		}
7148c2ecf20Sopenharmony_ci#endif
7158c2ecf20Sopenharmony_ci		cl = (struct qfq_class *)res.class;
7168c2ecf20Sopenharmony_ci		if (cl == NULL)
7178c2ecf20Sopenharmony_ci			cl = qfq_find_class(sch, res.classid);
7188c2ecf20Sopenharmony_ci		return cl;
7198c2ecf20Sopenharmony_ci	}
7208c2ecf20Sopenharmony_ci
7218c2ecf20Sopenharmony_ci	return NULL;
7228c2ecf20Sopenharmony_ci}
7238c2ecf20Sopenharmony_ci
7248c2ecf20Sopenharmony_ci/* Generic comparison function, handling wraparound. */
7258c2ecf20Sopenharmony_cistatic inline int qfq_gt(u64 a, u64 b)
7268c2ecf20Sopenharmony_ci{
7278c2ecf20Sopenharmony_ci	return (s64)(a - b) > 0;
7288c2ecf20Sopenharmony_ci}
7298c2ecf20Sopenharmony_ci
7308c2ecf20Sopenharmony_ci/* Round a precise timestamp to its slotted value. */
7318c2ecf20Sopenharmony_cistatic inline u64 qfq_round_down(u64 ts, unsigned int shift)
7328c2ecf20Sopenharmony_ci{
7338c2ecf20Sopenharmony_ci	return ts & ~((1ULL << shift) - 1);
7348c2ecf20Sopenharmony_ci}
7358c2ecf20Sopenharmony_ci
7368c2ecf20Sopenharmony_ci/* return the pointer to the group with lowest index in the bitmap */
7378c2ecf20Sopenharmony_cistatic inline struct qfq_group *qfq_ffs(struct qfq_sched *q,
7388c2ecf20Sopenharmony_ci					unsigned long bitmap)
7398c2ecf20Sopenharmony_ci{
7408c2ecf20Sopenharmony_ci	int index = __ffs(bitmap);
7418c2ecf20Sopenharmony_ci	return &q->groups[index];
7428c2ecf20Sopenharmony_ci}
7438c2ecf20Sopenharmony_ci/* Calculate a mask to mimic what would be ffs_from(). */
7448c2ecf20Sopenharmony_cistatic inline unsigned long mask_from(unsigned long bitmap, int from)
7458c2ecf20Sopenharmony_ci{
7468c2ecf20Sopenharmony_ci	return bitmap & ~((1UL << from) - 1);
7478c2ecf20Sopenharmony_ci}
7488c2ecf20Sopenharmony_ci
7498c2ecf20Sopenharmony_ci/*
7508c2ecf20Sopenharmony_ci * The state computation relies on ER=0, IR=1, EB=2, IB=3
7518c2ecf20Sopenharmony_ci * First compute eligibility comparing grp->S, q->V,
7528c2ecf20Sopenharmony_ci * then check if someone is blocking us and possibly add EB
7538c2ecf20Sopenharmony_ci */
7548c2ecf20Sopenharmony_cistatic int qfq_calc_state(struct qfq_sched *q, const struct qfq_group *grp)
7558c2ecf20Sopenharmony_ci{
7568c2ecf20Sopenharmony_ci	/* if S > V we are not eligible */
7578c2ecf20Sopenharmony_ci	unsigned int state = qfq_gt(grp->S, q->V);
7588c2ecf20Sopenharmony_ci	unsigned long mask = mask_from(q->bitmaps[ER], grp->index);
7598c2ecf20Sopenharmony_ci	struct qfq_group *next;
7608c2ecf20Sopenharmony_ci
7618c2ecf20Sopenharmony_ci	if (mask) {
7628c2ecf20Sopenharmony_ci		next = qfq_ffs(q, mask);
7638c2ecf20Sopenharmony_ci		if (qfq_gt(grp->F, next->F))
7648c2ecf20Sopenharmony_ci			state |= EB;
7658c2ecf20Sopenharmony_ci	}
7668c2ecf20Sopenharmony_ci
7678c2ecf20Sopenharmony_ci	return state;
7688c2ecf20Sopenharmony_ci}
7698c2ecf20Sopenharmony_ci
7708c2ecf20Sopenharmony_ci
7718c2ecf20Sopenharmony_ci/*
7728c2ecf20Sopenharmony_ci * In principle
7738c2ecf20Sopenharmony_ci *	q->bitmaps[dst] |= q->bitmaps[src] & mask;
7748c2ecf20Sopenharmony_ci *	q->bitmaps[src] &= ~mask;
7758c2ecf20Sopenharmony_ci * but we should make sure that src != dst
7768c2ecf20Sopenharmony_ci */
7778c2ecf20Sopenharmony_cistatic inline void qfq_move_groups(struct qfq_sched *q, unsigned long mask,
7788c2ecf20Sopenharmony_ci				   int src, int dst)
7798c2ecf20Sopenharmony_ci{
7808c2ecf20Sopenharmony_ci	q->bitmaps[dst] |= q->bitmaps[src] & mask;
7818c2ecf20Sopenharmony_ci	q->bitmaps[src] &= ~mask;
7828c2ecf20Sopenharmony_ci}
7838c2ecf20Sopenharmony_ci
7848c2ecf20Sopenharmony_cistatic void qfq_unblock_groups(struct qfq_sched *q, int index, u64 old_F)
7858c2ecf20Sopenharmony_ci{
7868c2ecf20Sopenharmony_ci	unsigned long mask = mask_from(q->bitmaps[ER], index + 1);
7878c2ecf20Sopenharmony_ci	struct qfq_group *next;
7888c2ecf20Sopenharmony_ci
7898c2ecf20Sopenharmony_ci	if (mask) {
7908c2ecf20Sopenharmony_ci		next = qfq_ffs(q, mask);
7918c2ecf20Sopenharmony_ci		if (!qfq_gt(next->F, old_F))
7928c2ecf20Sopenharmony_ci			return;
7938c2ecf20Sopenharmony_ci	}
7948c2ecf20Sopenharmony_ci
7958c2ecf20Sopenharmony_ci	mask = (1UL << index) - 1;
7968c2ecf20Sopenharmony_ci	qfq_move_groups(q, mask, EB, ER);
7978c2ecf20Sopenharmony_ci	qfq_move_groups(q, mask, IB, IR);
7988c2ecf20Sopenharmony_ci}
7998c2ecf20Sopenharmony_ci
8008c2ecf20Sopenharmony_ci/*
8018c2ecf20Sopenharmony_ci * perhaps
8028c2ecf20Sopenharmony_ci *
8038c2ecf20Sopenharmony_ci	old_V ^= q->V;
8048c2ecf20Sopenharmony_ci	old_V >>= q->min_slot_shift;
8058c2ecf20Sopenharmony_ci	if (old_V) {
8068c2ecf20Sopenharmony_ci		...
8078c2ecf20Sopenharmony_ci	}
8088c2ecf20Sopenharmony_ci *
8098c2ecf20Sopenharmony_ci */
8108c2ecf20Sopenharmony_cistatic void qfq_make_eligible(struct qfq_sched *q)
8118c2ecf20Sopenharmony_ci{
8128c2ecf20Sopenharmony_ci	unsigned long vslot = q->V >> q->min_slot_shift;
8138c2ecf20Sopenharmony_ci	unsigned long old_vslot = q->oldV >> q->min_slot_shift;
8148c2ecf20Sopenharmony_ci
8158c2ecf20Sopenharmony_ci	if (vslot != old_vslot) {
8168c2ecf20Sopenharmony_ci		unsigned long mask;
8178c2ecf20Sopenharmony_ci		int last_flip_pos = fls(vslot ^ old_vslot);
8188c2ecf20Sopenharmony_ci
8198c2ecf20Sopenharmony_ci		if (last_flip_pos > 31) /* higher than the number of groups */
8208c2ecf20Sopenharmony_ci			mask = ~0UL;    /* make all groups eligible */
8218c2ecf20Sopenharmony_ci		else
8228c2ecf20Sopenharmony_ci			mask = (1UL << last_flip_pos) - 1;
8238c2ecf20Sopenharmony_ci
8248c2ecf20Sopenharmony_ci		qfq_move_groups(q, mask, IR, ER);
8258c2ecf20Sopenharmony_ci		qfq_move_groups(q, mask, IB, EB);
8268c2ecf20Sopenharmony_ci	}
8278c2ecf20Sopenharmony_ci}
8288c2ecf20Sopenharmony_ci
8298c2ecf20Sopenharmony_ci/*
8308c2ecf20Sopenharmony_ci * The index of the slot in which the input aggregate agg is to be
8318c2ecf20Sopenharmony_ci * inserted must not be higher than QFQ_MAX_SLOTS-2. There is a '-2'
8328c2ecf20Sopenharmony_ci * and not a '-1' because the start time of the group may be moved
8338c2ecf20Sopenharmony_ci * backward by one slot after the aggregate has been inserted, and
8348c2ecf20Sopenharmony_ci * this would cause non-empty slots to be right-shifted by one
8358c2ecf20Sopenharmony_ci * position.
8368c2ecf20Sopenharmony_ci *
8378c2ecf20Sopenharmony_ci * QFQ+ fully satisfies this bound to the slot index if the parameters
8388c2ecf20Sopenharmony_ci * of the classes are not changed dynamically, and if QFQ+ never
8398c2ecf20Sopenharmony_ci * happens to postpone the service of agg unjustly, i.e., it never
8408c2ecf20Sopenharmony_ci * happens that the aggregate becomes backlogged and eligible, or just
8418c2ecf20Sopenharmony_ci * eligible, while an aggregate with a higher approximated finish time
8428c2ecf20Sopenharmony_ci * is being served. In particular, in this case QFQ+ guarantees that
8438c2ecf20Sopenharmony_ci * the timestamps of agg are low enough that the slot index is never
8448c2ecf20Sopenharmony_ci * higher than 2. Unfortunately, QFQ+ cannot provide the same
8458c2ecf20Sopenharmony_ci * guarantee if it happens to unjustly postpone the service of agg, or
8468c2ecf20Sopenharmony_ci * if the parameters of some class are changed.
8478c2ecf20Sopenharmony_ci *
8488c2ecf20Sopenharmony_ci * As for the first event, i.e., an out-of-order service, the
8498c2ecf20Sopenharmony_ci * upper bound to the slot index guaranteed by QFQ+ grows to
8508c2ecf20Sopenharmony_ci * 2 +
8518c2ecf20Sopenharmony_ci * QFQ_MAX_AGG_CLASSES * ((1<<QFQ_MTU_SHIFT)/QFQ_MIN_LMAX) *
8528c2ecf20Sopenharmony_ci * (current_max_weight/current_wsum) <= 2 + 8 * 128 * 1.
8538c2ecf20Sopenharmony_ci *
8548c2ecf20Sopenharmony_ci * The following function deals with this problem by backward-shifting
8558c2ecf20Sopenharmony_ci * the timestamps of agg, if needed, so as to guarantee that the slot
8568c2ecf20Sopenharmony_ci * index is never higher than QFQ_MAX_SLOTS-2. This backward-shift may
8578c2ecf20Sopenharmony_ci * cause the service of other aggregates to be postponed, yet the
8588c2ecf20Sopenharmony_ci * worst-case guarantees of these aggregates are not violated.  In
8598c2ecf20Sopenharmony_ci * fact, in case of no out-of-order service, the timestamps of agg
8608c2ecf20Sopenharmony_ci * would have been even lower than they are after the backward shift,
8618c2ecf20Sopenharmony_ci * because QFQ+ would have guaranteed a maximum value equal to 2 for
8628c2ecf20Sopenharmony_ci * the slot index, and 2 < QFQ_MAX_SLOTS-2. Hence the aggregates whose
8638c2ecf20Sopenharmony_ci * service is postponed because of the backward-shift would have
8648c2ecf20Sopenharmony_ci * however waited for the service of agg before being served.
8658c2ecf20Sopenharmony_ci *
8668c2ecf20Sopenharmony_ci * The other event that may cause the slot index to be higher than 2
8678c2ecf20Sopenharmony_ci * for agg is a recent change of the parameters of some class. If the
8688c2ecf20Sopenharmony_ci * weight of a class is increased or the lmax (max_pkt_size) of the
8698c2ecf20Sopenharmony_ci * class is decreased, then a new aggregate with smaller slot size
8708c2ecf20Sopenharmony_ci * than the original parent aggregate of the class may happen to be
8718c2ecf20Sopenharmony_ci * activated. The activation of this aggregate should be properly
8728c2ecf20Sopenharmony_ci * delayed to when the service of the class has finished in the ideal
8738c2ecf20Sopenharmony_ci * system tracked by QFQ+. If the activation of the aggregate is not
8748c2ecf20Sopenharmony_ci * delayed to this reference time instant, then this aggregate may be
8758c2ecf20Sopenharmony_ci * unjustly served before other aggregates waiting for service. This
8768c2ecf20Sopenharmony_ci * may cause the above bound to the slot index to be violated for some
8778c2ecf20Sopenharmony_ci * of these unlucky aggregates.
8788c2ecf20Sopenharmony_ci *
8798c2ecf20Sopenharmony_ci * Instead of delaying the activation of the new aggregate, which is
8808c2ecf20Sopenharmony_ci * quite complex, the above-discussed capping of the slot index is
8818c2ecf20Sopenharmony_ci * used to handle also the consequences of a change of the parameters
8828c2ecf20Sopenharmony_ci * of a class.
8838c2ecf20Sopenharmony_ci */
8848c2ecf20Sopenharmony_cistatic void qfq_slot_insert(struct qfq_group *grp, struct qfq_aggregate *agg,
8858c2ecf20Sopenharmony_ci			    u64 roundedS)
8868c2ecf20Sopenharmony_ci{
8878c2ecf20Sopenharmony_ci	u64 slot = (roundedS - grp->S) >> grp->slot_shift;
8888c2ecf20Sopenharmony_ci	unsigned int i; /* slot index in the bucket list */
8898c2ecf20Sopenharmony_ci
8908c2ecf20Sopenharmony_ci	if (unlikely(slot > QFQ_MAX_SLOTS - 2)) {
8918c2ecf20Sopenharmony_ci		u64 deltaS = roundedS - grp->S -
8928c2ecf20Sopenharmony_ci			((u64)(QFQ_MAX_SLOTS - 2)<<grp->slot_shift);
8938c2ecf20Sopenharmony_ci		agg->S -= deltaS;
8948c2ecf20Sopenharmony_ci		agg->F -= deltaS;
8958c2ecf20Sopenharmony_ci		slot = QFQ_MAX_SLOTS - 2;
8968c2ecf20Sopenharmony_ci	}
8978c2ecf20Sopenharmony_ci
8988c2ecf20Sopenharmony_ci	i = (grp->front + slot) % QFQ_MAX_SLOTS;
8998c2ecf20Sopenharmony_ci
9008c2ecf20Sopenharmony_ci	hlist_add_head(&agg->next, &grp->slots[i]);
9018c2ecf20Sopenharmony_ci	__set_bit(slot, &grp->full_slots);
9028c2ecf20Sopenharmony_ci}
9038c2ecf20Sopenharmony_ci
9048c2ecf20Sopenharmony_ci/* Maybe introduce hlist_first_entry?? */
9058c2ecf20Sopenharmony_cistatic struct qfq_aggregate *qfq_slot_head(struct qfq_group *grp)
9068c2ecf20Sopenharmony_ci{
9078c2ecf20Sopenharmony_ci	return hlist_entry(grp->slots[grp->front].first,
9088c2ecf20Sopenharmony_ci			   struct qfq_aggregate, next);
9098c2ecf20Sopenharmony_ci}
9108c2ecf20Sopenharmony_ci
9118c2ecf20Sopenharmony_ci/*
9128c2ecf20Sopenharmony_ci * remove the entry from the slot
9138c2ecf20Sopenharmony_ci */
9148c2ecf20Sopenharmony_cistatic void qfq_front_slot_remove(struct qfq_group *grp)
9158c2ecf20Sopenharmony_ci{
9168c2ecf20Sopenharmony_ci	struct qfq_aggregate *agg = qfq_slot_head(grp);
9178c2ecf20Sopenharmony_ci
9188c2ecf20Sopenharmony_ci	BUG_ON(!agg);
9198c2ecf20Sopenharmony_ci	hlist_del(&agg->next);
9208c2ecf20Sopenharmony_ci	if (hlist_empty(&grp->slots[grp->front]))
9218c2ecf20Sopenharmony_ci		__clear_bit(0, &grp->full_slots);
9228c2ecf20Sopenharmony_ci}
9238c2ecf20Sopenharmony_ci
9248c2ecf20Sopenharmony_ci/*
9258c2ecf20Sopenharmony_ci * Returns the first aggregate in the first non-empty bucket of the
9268c2ecf20Sopenharmony_ci * group. As a side effect, adjusts the bucket list so the first
9278c2ecf20Sopenharmony_ci * non-empty bucket is at position 0 in full_slots.
9288c2ecf20Sopenharmony_ci */
9298c2ecf20Sopenharmony_cistatic struct qfq_aggregate *qfq_slot_scan(struct qfq_group *grp)
9308c2ecf20Sopenharmony_ci{
9318c2ecf20Sopenharmony_ci	unsigned int i;
9328c2ecf20Sopenharmony_ci
9338c2ecf20Sopenharmony_ci	pr_debug("qfq slot_scan: grp %u full %#lx\n",
9348c2ecf20Sopenharmony_ci		 grp->index, grp->full_slots);
9358c2ecf20Sopenharmony_ci
9368c2ecf20Sopenharmony_ci	if (grp->full_slots == 0)
9378c2ecf20Sopenharmony_ci		return NULL;
9388c2ecf20Sopenharmony_ci
9398c2ecf20Sopenharmony_ci	i = __ffs(grp->full_slots);  /* zero based */
9408c2ecf20Sopenharmony_ci	if (i > 0) {
9418c2ecf20Sopenharmony_ci		grp->front = (grp->front + i) % QFQ_MAX_SLOTS;
9428c2ecf20Sopenharmony_ci		grp->full_slots >>= i;
9438c2ecf20Sopenharmony_ci	}
9448c2ecf20Sopenharmony_ci
9458c2ecf20Sopenharmony_ci	return qfq_slot_head(grp);
9468c2ecf20Sopenharmony_ci}
9478c2ecf20Sopenharmony_ci
9488c2ecf20Sopenharmony_ci/*
9498c2ecf20Sopenharmony_ci * adjust the bucket list. When the start time of a group decreases,
9508c2ecf20Sopenharmony_ci * we move the index down (modulo QFQ_MAX_SLOTS) so we don't need to
9518c2ecf20Sopenharmony_ci * move the objects. The mask of occupied slots must be shifted
9528c2ecf20Sopenharmony_ci * because we use ffs() to find the first non-empty slot.
9538c2ecf20Sopenharmony_ci * This covers decreases in the group's start time, but what about
9548c2ecf20Sopenharmony_ci * increases of the start time ?
9558c2ecf20Sopenharmony_ci * Here too we should make sure that i is less than 32
9568c2ecf20Sopenharmony_ci */
9578c2ecf20Sopenharmony_cistatic void qfq_slot_rotate(struct qfq_group *grp, u64 roundedS)
9588c2ecf20Sopenharmony_ci{
9598c2ecf20Sopenharmony_ci	unsigned int i = (grp->S - roundedS) >> grp->slot_shift;
9608c2ecf20Sopenharmony_ci
9618c2ecf20Sopenharmony_ci	grp->full_slots <<= i;
9628c2ecf20Sopenharmony_ci	grp->front = (grp->front - i) % QFQ_MAX_SLOTS;
9638c2ecf20Sopenharmony_ci}
9648c2ecf20Sopenharmony_ci
9658c2ecf20Sopenharmony_cistatic void qfq_update_eligible(struct qfq_sched *q)
9668c2ecf20Sopenharmony_ci{
9678c2ecf20Sopenharmony_ci	struct qfq_group *grp;
9688c2ecf20Sopenharmony_ci	unsigned long ineligible;
9698c2ecf20Sopenharmony_ci
9708c2ecf20Sopenharmony_ci	ineligible = q->bitmaps[IR] | q->bitmaps[IB];
9718c2ecf20Sopenharmony_ci	if (ineligible) {
9728c2ecf20Sopenharmony_ci		if (!q->bitmaps[ER]) {
9738c2ecf20Sopenharmony_ci			grp = qfq_ffs(q, ineligible);
9748c2ecf20Sopenharmony_ci			if (qfq_gt(grp->S, q->V))
9758c2ecf20Sopenharmony_ci				q->V = grp->S;
9768c2ecf20Sopenharmony_ci		}
9778c2ecf20Sopenharmony_ci		qfq_make_eligible(q);
9788c2ecf20Sopenharmony_ci	}
9798c2ecf20Sopenharmony_ci}
9808c2ecf20Sopenharmony_ci
9818c2ecf20Sopenharmony_ci/* Dequeue head packet of the head class in the DRR queue of the aggregate. */
9828c2ecf20Sopenharmony_cistatic struct sk_buff *agg_dequeue(struct qfq_aggregate *agg,
9838c2ecf20Sopenharmony_ci				   struct qfq_class *cl, unsigned int len)
9848c2ecf20Sopenharmony_ci{
9858c2ecf20Sopenharmony_ci	struct sk_buff *skb = qdisc_dequeue_peeked(cl->qdisc);
9868c2ecf20Sopenharmony_ci
9878c2ecf20Sopenharmony_ci	if (!skb)
9888c2ecf20Sopenharmony_ci		return NULL;
9898c2ecf20Sopenharmony_ci
9908c2ecf20Sopenharmony_ci	cl->deficit -= (int) len;
9918c2ecf20Sopenharmony_ci
9928c2ecf20Sopenharmony_ci	if (cl->qdisc->q.qlen == 0) /* no more packets, remove from list */
9938c2ecf20Sopenharmony_ci		list_del(&cl->alist);
9948c2ecf20Sopenharmony_ci	else if (cl->deficit < qdisc_pkt_len(cl->qdisc->ops->peek(cl->qdisc))) {
9958c2ecf20Sopenharmony_ci		cl->deficit += agg->lmax;
9968c2ecf20Sopenharmony_ci		list_move_tail(&cl->alist, &agg->active);
9978c2ecf20Sopenharmony_ci	}
9988c2ecf20Sopenharmony_ci
9998c2ecf20Sopenharmony_ci	return skb;
10008c2ecf20Sopenharmony_ci}
10018c2ecf20Sopenharmony_ci
10028c2ecf20Sopenharmony_cistatic inline struct sk_buff *qfq_peek_skb(struct qfq_aggregate *agg,
10038c2ecf20Sopenharmony_ci					   struct qfq_class **cl,
10048c2ecf20Sopenharmony_ci					   unsigned int *len)
10058c2ecf20Sopenharmony_ci{
10068c2ecf20Sopenharmony_ci	struct sk_buff *skb;
10078c2ecf20Sopenharmony_ci
10088c2ecf20Sopenharmony_ci	*cl = list_first_entry(&agg->active, struct qfq_class, alist);
10098c2ecf20Sopenharmony_ci	skb = (*cl)->qdisc->ops->peek((*cl)->qdisc);
10108c2ecf20Sopenharmony_ci	if (skb == NULL)
10118c2ecf20Sopenharmony_ci		WARN_ONCE(1, "qfq_dequeue: non-workconserving leaf\n");
10128c2ecf20Sopenharmony_ci	else
10138c2ecf20Sopenharmony_ci		*len = qdisc_pkt_len(skb);
10148c2ecf20Sopenharmony_ci
10158c2ecf20Sopenharmony_ci	return skb;
10168c2ecf20Sopenharmony_ci}
10178c2ecf20Sopenharmony_ci
10188c2ecf20Sopenharmony_ci/* Update F according to the actual service received by the aggregate. */
10198c2ecf20Sopenharmony_cistatic inline void charge_actual_service(struct qfq_aggregate *agg)
10208c2ecf20Sopenharmony_ci{
10218c2ecf20Sopenharmony_ci	/* Compute the service received by the aggregate, taking into
10228c2ecf20Sopenharmony_ci	 * account that, after decreasing the number of classes in
10238c2ecf20Sopenharmony_ci	 * agg, it may happen that
10248c2ecf20Sopenharmony_ci	 * agg->initial_budget - agg->budget > agg->bugdetmax
10258c2ecf20Sopenharmony_ci	 */
10268c2ecf20Sopenharmony_ci	u32 service_received = min(agg->budgetmax,
10278c2ecf20Sopenharmony_ci				   agg->initial_budget - agg->budget);
10288c2ecf20Sopenharmony_ci
10298c2ecf20Sopenharmony_ci	agg->F = agg->S + (u64)service_received * agg->inv_w;
10308c2ecf20Sopenharmony_ci}
10318c2ecf20Sopenharmony_ci
10328c2ecf20Sopenharmony_ci/* Assign a reasonable start time for a new aggregate in group i.
10338c2ecf20Sopenharmony_ci * Admissible values for \hat(F) are multiples of \sigma_i
10348c2ecf20Sopenharmony_ci * no greater than V+\sigma_i . Larger values mean that
10358c2ecf20Sopenharmony_ci * we had a wraparound so we consider the timestamp to be stale.
10368c2ecf20Sopenharmony_ci *
10378c2ecf20Sopenharmony_ci * If F is not stale and F >= V then we set S = F.
10388c2ecf20Sopenharmony_ci * Otherwise we should assign S = V, but this may violate
10398c2ecf20Sopenharmony_ci * the ordering in EB (see [2]). So, if we have groups in ER,
10408c2ecf20Sopenharmony_ci * set S to the F_j of the first group j which would be blocking us.
10418c2ecf20Sopenharmony_ci * We are guaranteed not to move S backward because
10428c2ecf20Sopenharmony_ci * otherwise our group i would still be blocked.
10438c2ecf20Sopenharmony_ci */
10448c2ecf20Sopenharmony_cistatic void qfq_update_start(struct qfq_sched *q, struct qfq_aggregate *agg)
10458c2ecf20Sopenharmony_ci{
10468c2ecf20Sopenharmony_ci	unsigned long mask;
10478c2ecf20Sopenharmony_ci	u64 limit, roundedF;
10488c2ecf20Sopenharmony_ci	int slot_shift = agg->grp->slot_shift;
10498c2ecf20Sopenharmony_ci
10508c2ecf20Sopenharmony_ci	roundedF = qfq_round_down(agg->F, slot_shift);
10518c2ecf20Sopenharmony_ci	limit = qfq_round_down(q->V, slot_shift) + (1ULL << slot_shift);
10528c2ecf20Sopenharmony_ci
10538c2ecf20Sopenharmony_ci	if (!qfq_gt(agg->F, q->V) || qfq_gt(roundedF, limit)) {
10548c2ecf20Sopenharmony_ci		/* timestamp was stale */
10558c2ecf20Sopenharmony_ci		mask = mask_from(q->bitmaps[ER], agg->grp->index);
10568c2ecf20Sopenharmony_ci		if (mask) {
10578c2ecf20Sopenharmony_ci			struct qfq_group *next = qfq_ffs(q, mask);
10588c2ecf20Sopenharmony_ci			if (qfq_gt(roundedF, next->F)) {
10598c2ecf20Sopenharmony_ci				if (qfq_gt(limit, next->F))
10608c2ecf20Sopenharmony_ci					agg->S = next->F;
10618c2ecf20Sopenharmony_ci				else /* preserve timestamp correctness */
10628c2ecf20Sopenharmony_ci					agg->S = limit;
10638c2ecf20Sopenharmony_ci				return;
10648c2ecf20Sopenharmony_ci			}
10658c2ecf20Sopenharmony_ci		}
10668c2ecf20Sopenharmony_ci		agg->S = q->V;
10678c2ecf20Sopenharmony_ci	} else  /* timestamp is not stale */
10688c2ecf20Sopenharmony_ci		agg->S = agg->F;
10698c2ecf20Sopenharmony_ci}
10708c2ecf20Sopenharmony_ci
10718c2ecf20Sopenharmony_ci/* Update the timestamps of agg before scheduling/rescheduling it for
10728c2ecf20Sopenharmony_ci * service.  In particular, assign to agg->F its maximum possible
10738c2ecf20Sopenharmony_ci * value, i.e., the virtual finish time with which the aggregate
10748c2ecf20Sopenharmony_ci * should be labeled if it used all its budget once in service.
10758c2ecf20Sopenharmony_ci */
10768c2ecf20Sopenharmony_cistatic inline void
10778c2ecf20Sopenharmony_ciqfq_update_agg_ts(struct qfq_sched *q,
10788c2ecf20Sopenharmony_ci		    struct qfq_aggregate *agg, enum update_reason reason)
10798c2ecf20Sopenharmony_ci{
10808c2ecf20Sopenharmony_ci	if (reason != requeue)
10818c2ecf20Sopenharmony_ci		qfq_update_start(q, agg);
10828c2ecf20Sopenharmony_ci	else /* just charge agg for the service received */
10838c2ecf20Sopenharmony_ci		agg->S = agg->F;
10848c2ecf20Sopenharmony_ci
10858c2ecf20Sopenharmony_ci	agg->F = agg->S + (u64)agg->budgetmax * agg->inv_w;
10868c2ecf20Sopenharmony_ci}
10878c2ecf20Sopenharmony_ci
10888c2ecf20Sopenharmony_cistatic void qfq_schedule_agg(struct qfq_sched *q, struct qfq_aggregate *agg);
10898c2ecf20Sopenharmony_ci
10908c2ecf20Sopenharmony_cistatic struct sk_buff *qfq_dequeue(struct Qdisc *sch)
10918c2ecf20Sopenharmony_ci{
10928c2ecf20Sopenharmony_ci	struct qfq_sched *q = qdisc_priv(sch);
10938c2ecf20Sopenharmony_ci	struct qfq_aggregate *in_serv_agg = q->in_serv_agg;
10948c2ecf20Sopenharmony_ci	struct qfq_class *cl;
10958c2ecf20Sopenharmony_ci	struct sk_buff *skb = NULL;
10968c2ecf20Sopenharmony_ci	/* next-packet len, 0 means no more active classes in in-service agg */
10978c2ecf20Sopenharmony_ci	unsigned int len = 0;
10988c2ecf20Sopenharmony_ci
10998c2ecf20Sopenharmony_ci	if (in_serv_agg == NULL)
11008c2ecf20Sopenharmony_ci		return NULL;
11018c2ecf20Sopenharmony_ci
11028c2ecf20Sopenharmony_ci	if (!list_empty(&in_serv_agg->active))
11038c2ecf20Sopenharmony_ci		skb = qfq_peek_skb(in_serv_agg, &cl, &len);
11048c2ecf20Sopenharmony_ci
11058c2ecf20Sopenharmony_ci	/*
11068c2ecf20Sopenharmony_ci	 * If there are no active classes in the in-service aggregate,
11078c2ecf20Sopenharmony_ci	 * or if the aggregate has not enough budget to serve its next
11088c2ecf20Sopenharmony_ci	 * class, then choose the next aggregate to serve.
11098c2ecf20Sopenharmony_ci	 */
11108c2ecf20Sopenharmony_ci	if (len == 0 || in_serv_agg->budget < len) {
11118c2ecf20Sopenharmony_ci		charge_actual_service(in_serv_agg);
11128c2ecf20Sopenharmony_ci
11138c2ecf20Sopenharmony_ci		/* recharge the budget of the aggregate */
11148c2ecf20Sopenharmony_ci		in_serv_agg->initial_budget = in_serv_agg->budget =
11158c2ecf20Sopenharmony_ci			in_serv_agg->budgetmax;
11168c2ecf20Sopenharmony_ci
11178c2ecf20Sopenharmony_ci		if (!list_empty(&in_serv_agg->active)) {
11188c2ecf20Sopenharmony_ci			/*
11198c2ecf20Sopenharmony_ci			 * Still active: reschedule for
11208c2ecf20Sopenharmony_ci			 * service. Possible optimization: if no other
11218c2ecf20Sopenharmony_ci			 * aggregate is active, then there is no point
11228c2ecf20Sopenharmony_ci			 * in rescheduling this aggregate, and we can
11238c2ecf20Sopenharmony_ci			 * just keep it as the in-service one. This
11248c2ecf20Sopenharmony_ci			 * should be however a corner case, and to
11258c2ecf20Sopenharmony_ci			 * handle it, we would need to maintain an
11268c2ecf20Sopenharmony_ci			 * extra num_active_aggs field.
11278c2ecf20Sopenharmony_ci			*/
11288c2ecf20Sopenharmony_ci			qfq_update_agg_ts(q, in_serv_agg, requeue);
11298c2ecf20Sopenharmony_ci			qfq_schedule_agg(q, in_serv_agg);
11308c2ecf20Sopenharmony_ci		} else if (sch->q.qlen == 0) { /* no aggregate to serve */
11318c2ecf20Sopenharmony_ci			q->in_serv_agg = NULL;
11328c2ecf20Sopenharmony_ci			return NULL;
11338c2ecf20Sopenharmony_ci		}
11348c2ecf20Sopenharmony_ci
11358c2ecf20Sopenharmony_ci		/*
11368c2ecf20Sopenharmony_ci		 * If we get here, there are other aggregates queued:
11378c2ecf20Sopenharmony_ci		 * choose the new aggregate to serve.
11388c2ecf20Sopenharmony_ci		 */
11398c2ecf20Sopenharmony_ci		in_serv_agg = q->in_serv_agg = qfq_choose_next_agg(q);
11408c2ecf20Sopenharmony_ci		skb = qfq_peek_skb(in_serv_agg, &cl, &len);
11418c2ecf20Sopenharmony_ci	}
11428c2ecf20Sopenharmony_ci	if (!skb)
11438c2ecf20Sopenharmony_ci		return NULL;
11448c2ecf20Sopenharmony_ci
11458c2ecf20Sopenharmony_ci	sch->q.qlen--;
11468c2ecf20Sopenharmony_ci
11478c2ecf20Sopenharmony_ci	skb = agg_dequeue(in_serv_agg, cl, len);
11488c2ecf20Sopenharmony_ci
11498c2ecf20Sopenharmony_ci	if (!skb) {
11508c2ecf20Sopenharmony_ci		sch->q.qlen++;
11518c2ecf20Sopenharmony_ci		return NULL;
11528c2ecf20Sopenharmony_ci	}
11538c2ecf20Sopenharmony_ci
11548c2ecf20Sopenharmony_ci	qdisc_qstats_backlog_dec(sch, skb);
11558c2ecf20Sopenharmony_ci	qdisc_bstats_update(sch, skb);
11568c2ecf20Sopenharmony_ci
11578c2ecf20Sopenharmony_ci	/* If lmax is lowered, through qfq_change_class, for a class
11588c2ecf20Sopenharmony_ci	 * owning pending packets with larger size than the new value
11598c2ecf20Sopenharmony_ci	 * of lmax, then the following condition may hold.
11608c2ecf20Sopenharmony_ci	 */
11618c2ecf20Sopenharmony_ci	if (unlikely(in_serv_agg->budget < len))
11628c2ecf20Sopenharmony_ci		in_serv_agg->budget = 0;
11638c2ecf20Sopenharmony_ci	else
11648c2ecf20Sopenharmony_ci		in_serv_agg->budget -= len;
11658c2ecf20Sopenharmony_ci
11668c2ecf20Sopenharmony_ci	q->V += (u64)len * q->iwsum;
11678c2ecf20Sopenharmony_ci	pr_debug("qfq dequeue: len %u F %lld now %lld\n",
11688c2ecf20Sopenharmony_ci		 len, (unsigned long long) in_serv_agg->F,
11698c2ecf20Sopenharmony_ci		 (unsigned long long) q->V);
11708c2ecf20Sopenharmony_ci
11718c2ecf20Sopenharmony_ci	return skb;
11728c2ecf20Sopenharmony_ci}
11738c2ecf20Sopenharmony_ci
11748c2ecf20Sopenharmony_cistatic struct qfq_aggregate *qfq_choose_next_agg(struct qfq_sched *q)
11758c2ecf20Sopenharmony_ci{
11768c2ecf20Sopenharmony_ci	struct qfq_group *grp;
11778c2ecf20Sopenharmony_ci	struct qfq_aggregate *agg, *new_front_agg;
11788c2ecf20Sopenharmony_ci	u64 old_F;
11798c2ecf20Sopenharmony_ci
11808c2ecf20Sopenharmony_ci	qfq_update_eligible(q);
11818c2ecf20Sopenharmony_ci	q->oldV = q->V;
11828c2ecf20Sopenharmony_ci
11838c2ecf20Sopenharmony_ci	if (!q->bitmaps[ER])
11848c2ecf20Sopenharmony_ci		return NULL;
11858c2ecf20Sopenharmony_ci
11868c2ecf20Sopenharmony_ci	grp = qfq_ffs(q, q->bitmaps[ER]);
11878c2ecf20Sopenharmony_ci	old_F = grp->F;
11888c2ecf20Sopenharmony_ci
11898c2ecf20Sopenharmony_ci	agg = qfq_slot_head(grp);
11908c2ecf20Sopenharmony_ci
11918c2ecf20Sopenharmony_ci	/* agg starts to be served, remove it from schedule */
11928c2ecf20Sopenharmony_ci	qfq_front_slot_remove(grp);
11938c2ecf20Sopenharmony_ci
11948c2ecf20Sopenharmony_ci	new_front_agg = qfq_slot_scan(grp);
11958c2ecf20Sopenharmony_ci
11968c2ecf20Sopenharmony_ci	if (new_front_agg == NULL) /* group is now inactive, remove from ER */
11978c2ecf20Sopenharmony_ci		__clear_bit(grp->index, &q->bitmaps[ER]);
11988c2ecf20Sopenharmony_ci	else {
11998c2ecf20Sopenharmony_ci		u64 roundedS = qfq_round_down(new_front_agg->S,
12008c2ecf20Sopenharmony_ci					      grp->slot_shift);
12018c2ecf20Sopenharmony_ci		unsigned int s;
12028c2ecf20Sopenharmony_ci
12038c2ecf20Sopenharmony_ci		if (grp->S == roundedS)
12048c2ecf20Sopenharmony_ci			return agg;
12058c2ecf20Sopenharmony_ci		grp->S = roundedS;
12068c2ecf20Sopenharmony_ci		grp->F = roundedS + (2ULL << grp->slot_shift);
12078c2ecf20Sopenharmony_ci		__clear_bit(grp->index, &q->bitmaps[ER]);
12088c2ecf20Sopenharmony_ci		s = qfq_calc_state(q, grp);
12098c2ecf20Sopenharmony_ci		__set_bit(grp->index, &q->bitmaps[s]);
12108c2ecf20Sopenharmony_ci	}
12118c2ecf20Sopenharmony_ci
12128c2ecf20Sopenharmony_ci	qfq_unblock_groups(q, grp->index, old_F);
12138c2ecf20Sopenharmony_ci
12148c2ecf20Sopenharmony_ci	return agg;
12158c2ecf20Sopenharmony_ci}
12168c2ecf20Sopenharmony_ci
12178c2ecf20Sopenharmony_cistatic int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
12188c2ecf20Sopenharmony_ci		       struct sk_buff **to_free)
12198c2ecf20Sopenharmony_ci{
12208c2ecf20Sopenharmony_ci	unsigned int len = qdisc_pkt_len(skb), gso_segs;
12218c2ecf20Sopenharmony_ci	struct qfq_sched *q = qdisc_priv(sch);
12228c2ecf20Sopenharmony_ci	struct qfq_class *cl;
12238c2ecf20Sopenharmony_ci	struct qfq_aggregate *agg;
12248c2ecf20Sopenharmony_ci	int err = 0;
12258c2ecf20Sopenharmony_ci	bool first;
12268c2ecf20Sopenharmony_ci
12278c2ecf20Sopenharmony_ci	cl = qfq_classify(skb, sch, &err);
12288c2ecf20Sopenharmony_ci	if (cl == NULL) {
12298c2ecf20Sopenharmony_ci		if (err & __NET_XMIT_BYPASS)
12308c2ecf20Sopenharmony_ci			qdisc_qstats_drop(sch);
12318c2ecf20Sopenharmony_ci		__qdisc_drop(skb, to_free);
12328c2ecf20Sopenharmony_ci		return err;
12338c2ecf20Sopenharmony_ci	}
12348c2ecf20Sopenharmony_ci	pr_debug("qfq_enqueue: cl = %x\n", cl->common.classid);
12358c2ecf20Sopenharmony_ci
12368c2ecf20Sopenharmony_ci	if (unlikely(cl->agg->lmax < len)) {
12378c2ecf20Sopenharmony_ci		pr_debug("qfq: increasing maxpkt from %u to %u for class %u",
12388c2ecf20Sopenharmony_ci			 cl->agg->lmax, len, cl->common.classid);
12398c2ecf20Sopenharmony_ci		err = qfq_change_agg(sch, cl, cl->agg->class_weight, len);
12408c2ecf20Sopenharmony_ci		if (err) {
12418c2ecf20Sopenharmony_ci			cl->qstats.drops++;
12428c2ecf20Sopenharmony_ci			return qdisc_drop(skb, sch, to_free);
12438c2ecf20Sopenharmony_ci		}
12448c2ecf20Sopenharmony_ci	}
12458c2ecf20Sopenharmony_ci
12468c2ecf20Sopenharmony_ci	gso_segs = skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1;
12478c2ecf20Sopenharmony_ci	first = !cl->qdisc->q.qlen;
12488c2ecf20Sopenharmony_ci	err = qdisc_enqueue(skb, cl->qdisc, to_free);
12498c2ecf20Sopenharmony_ci	if (unlikely(err != NET_XMIT_SUCCESS)) {
12508c2ecf20Sopenharmony_ci		pr_debug("qfq_enqueue: enqueue failed %d\n", err);
12518c2ecf20Sopenharmony_ci		if (net_xmit_drop_count(err)) {
12528c2ecf20Sopenharmony_ci			cl->qstats.drops++;
12538c2ecf20Sopenharmony_ci			qdisc_qstats_drop(sch);
12548c2ecf20Sopenharmony_ci		}
12558c2ecf20Sopenharmony_ci		return err;
12568c2ecf20Sopenharmony_ci	}
12578c2ecf20Sopenharmony_ci
12588c2ecf20Sopenharmony_ci	cl->bstats.bytes += len;
12598c2ecf20Sopenharmony_ci	cl->bstats.packets += gso_segs;
12608c2ecf20Sopenharmony_ci	sch->qstats.backlog += len;
12618c2ecf20Sopenharmony_ci	++sch->q.qlen;
12628c2ecf20Sopenharmony_ci
12638c2ecf20Sopenharmony_ci	agg = cl->agg;
12648c2ecf20Sopenharmony_ci	/* if the queue was not empty, then done here */
12658c2ecf20Sopenharmony_ci	if (!first) {
12668c2ecf20Sopenharmony_ci		if (unlikely(skb == cl->qdisc->ops->peek(cl->qdisc)) &&
12678c2ecf20Sopenharmony_ci		    list_first_entry(&agg->active, struct qfq_class, alist)
12688c2ecf20Sopenharmony_ci		    == cl && cl->deficit < len)
12698c2ecf20Sopenharmony_ci			list_move_tail(&cl->alist, &agg->active);
12708c2ecf20Sopenharmony_ci
12718c2ecf20Sopenharmony_ci		return err;
12728c2ecf20Sopenharmony_ci	}
12738c2ecf20Sopenharmony_ci
12748c2ecf20Sopenharmony_ci	/* schedule class for service within the aggregate */
12758c2ecf20Sopenharmony_ci	cl->deficit = agg->lmax;
12768c2ecf20Sopenharmony_ci	list_add_tail(&cl->alist, &agg->active);
12778c2ecf20Sopenharmony_ci
12788c2ecf20Sopenharmony_ci	if (list_first_entry(&agg->active, struct qfq_class, alist) != cl ||
12798c2ecf20Sopenharmony_ci	    q->in_serv_agg == agg)
12808c2ecf20Sopenharmony_ci		return err; /* non-empty or in service, nothing else to do */
12818c2ecf20Sopenharmony_ci
12828c2ecf20Sopenharmony_ci	qfq_activate_agg(q, agg, enqueue);
12838c2ecf20Sopenharmony_ci
12848c2ecf20Sopenharmony_ci	return err;
12858c2ecf20Sopenharmony_ci}
12868c2ecf20Sopenharmony_ci
12878c2ecf20Sopenharmony_ci/*
12888c2ecf20Sopenharmony_ci * Schedule aggregate according to its timestamps.
12898c2ecf20Sopenharmony_ci */
12908c2ecf20Sopenharmony_cistatic void qfq_schedule_agg(struct qfq_sched *q, struct qfq_aggregate *agg)
12918c2ecf20Sopenharmony_ci{
12928c2ecf20Sopenharmony_ci	struct qfq_group *grp = agg->grp;
12938c2ecf20Sopenharmony_ci	u64 roundedS;
12948c2ecf20Sopenharmony_ci	int s;
12958c2ecf20Sopenharmony_ci
12968c2ecf20Sopenharmony_ci	roundedS = qfq_round_down(agg->S, grp->slot_shift);
12978c2ecf20Sopenharmony_ci
12988c2ecf20Sopenharmony_ci	/*
12998c2ecf20Sopenharmony_ci	 * Insert agg in the correct bucket.
13008c2ecf20Sopenharmony_ci	 * If agg->S >= grp->S we don't need to adjust the
13018c2ecf20Sopenharmony_ci	 * bucket list and simply go to the insertion phase.
13028c2ecf20Sopenharmony_ci	 * Otherwise grp->S is decreasing, we must make room
13038c2ecf20Sopenharmony_ci	 * in the bucket list, and also recompute the group state.
13048c2ecf20Sopenharmony_ci	 * Finally, if there were no flows in this group and nobody
13058c2ecf20Sopenharmony_ci	 * was in ER make sure to adjust V.
13068c2ecf20Sopenharmony_ci	 */
13078c2ecf20Sopenharmony_ci	if (grp->full_slots) {
13088c2ecf20Sopenharmony_ci		if (!qfq_gt(grp->S, agg->S))
13098c2ecf20Sopenharmony_ci			goto skip_update;
13108c2ecf20Sopenharmony_ci
13118c2ecf20Sopenharmony_ci		/* create a slot for this agg->S */
13128c2ecf20Sopenharmony_ci		qfq_slot_rotate(grp, roundedS);
13138c2ecf20Sopenharmony_ci		/* group was surely ineligible, remove */
13148c2ecf20Sopenharmony_ci		__clear_bit(grp->index, &q->bitmaps[IR]);
13158c2ecf20Sopenharmony_ci		__clear_bit(grp->index, &q->bitmaps[IB]);
13168c2ecf20Sopenharmony_ci	} else if (!q->bitmaps[ER] && qfq_gt(roundedS, q->V) &&
13178c2ecf20Sopenharmony_ci		   q->in_serv_agg == NULL)
13188c2ecf20Sopenharmony_ci		q->V = roundedS;
13198c2ecf20Sopenharmony_ci
13208c2ecf20Sopenharmony_ci	grp->S = roundedS;
13218c2ecf20Sopenharmony_ci	grp->F = roundedS + (2ULL << grp->slot_shift);
13228c2ecf20Sopenharmony_ci	s = qfq_calc_state(q, grp);
13238c2ecf20Sopenharmony_ci	__set_bit(grp->index, &q->bitmaps[s]);
13248c2ecf20Sopenharmony_ci
13258c2ecf20Sopenharmony_ci	pr_debug("qfq enqueue: new state %d %#lx S %lld F %lld V %lld\n",
13268c2ecf20Sopenharmony_ci		 s, q->bitmaps[s],
13278c2ecf20Sopenharmony_ci		 (unsigned long long) agg->S,
13288c2ecf20Sopenharmony_ci		 (unsigned long long) agg->F,
13298c2ecf20Sopenharmony_ci		 (unsigned long long) q->V);
13308c2ecf20Sopenharmony_ci
13318c2ecf20Sopenharmony_ciskip_update:
13328c2ecf20Sopenharmony_ci	qfq_slot_insert(grp, agg, roundedS);
13338c2ecf20Sopenharmony_ci}
13348c2ecf20Sopenharmony_ci
13358c2ecf20Sopenharmony_ci
13368c2ecf20Sopenharmony_ci/* Update agg ts and schedule agg for service */
13378c2ecf20Sopenharmony_cistatic void qfq_activate_agg(struct qfq_sched *q, struct qfq_aggregate *agg,
13388c2ecf20Sopenharmony_ci			     enum update_reason reason)
13398c2ecf20Sopenharmony_ci{
13408c2ecf20Sopenharmony_ci	agg->initial_budget = agg->budget = agg->budgetmax; /* recharge budg. */
13418c2ecf20Sopenharmony_ci
13428c2ecf20Sopenharmony_ci	qfq_update_agg_ts(q, agg, reason);
13438c2ecf20Sopenharmony_ci	if (q->in_serv_agg == NULL) { /* no aggr. in service or scheduled */
13448c2ecf20Sopenharmony_ci		q->in_serv_agg = agg; /* start serving this aggregate */
13458c2ecf20Sopenharmony_ci		 /* update V: to be in service, agg must be eligible */
13468c2ecf20Sopenharmony_ci		q->oldV = q->V = agg->S;
13478c2ecf20Sopenharmony_ci	} else if (agg != q->in_serv_agg)
13488c2ecf20Sopenharmony_ci		qfq_schedule_agg(q, agg);
13498c2ecf20Sopenharmony_ci}
13508c2ecf20Sopenharmony_ci
13518c2ecf20Sopenharmony_cistatic void qfq_slot_remove(struct qfq_sched *q, struct qfq_group *grp,
13528c2ecf20Sopenharmony_ci			    struct qfq_aggregate *agg)
13538c2ecf20Sopenharmony_ci{
13548c2ecf20Sopenharmony_ci	unsigned int i, offset;
13558c2ecf20Sopenharmony_ci	u64 roundedS;
13568c2ecf20Sopenharmony_ci
13578c2ecf20Sopenharmony_ci	roundedS = qfq_round_down(agg->S, grp->slot_shift);
13588c2ecf20Sopenharmony_ci	offset = (roundedS - grp->S) >> grp->slot_shift;
13598c2ecf20Sopenharmony_ci
13608c2ecf20Sopenharmony_ci	i = (grp->front + offset) % QFQ_MAX_SLOTS;
13618c2ecf20Sopenharmony_ci
13628c2ecf20Sopenharmony_ci	hlist_del(&agg->next);
13638c2ecf20Sopenharmony_ci	if (hlist_empty(&grp->slots[i]))
13648c2ecf20Sopenharmony_ci		__clear_bit(offset, &grp->full_slots);
13658c2ecf20Sopenharmony_ci}
13668c2ecf20Sopenharmony_ci
13678c2ecf20Sopenharmony_ci/*
13688c2ecf20Sopenharmony_ci * Called to forcibly deschedule an aggregate.  If the aggregate is
13698c2ecf20Sopenharmony_ci * not in the front bucket, or if the latter has other aggregates in
13708c2ecf20Sopenharmony_ci * the front bucket, we can simply remove the aggregate with no other
13718c2ecf20Sopenharmony_ci * side effects.
13728c2ecf20Sopenharmony_ci * Otherwise we must propagate the event up.
13738c2ecf20Sopenharmony_ci */
13748c2ecf20Sopenharmony_cistatic void qfq_deactivate_agg(struct qfq_sched *q, struct qfq_aggregate *agg)
13758c2ecf20Sopenharmony_ci{
13768c2ecf20Sopenharmony_ci	struct qfq_group *grp = agg->grp;
13778c2ecf20Sopenharmony_ci	unsigned long mask;
13788c2ecf20Sopenharmony_ci	u64 roundedS;
13798c2ecf20Sopenharmony_ci	int s;
13808c2ecf20Sopenharmony_ci
13818c2ecf20Sopenharmony_ci	if (agg == q->in_serv_agg) {
13828c2ecf20Sopenharmony_ci		charge_actual_service(agg);
13838c2ecf20Sopenharmony_ci		q->in_serv_agg = qfq_choose_next_agg(q);
13848c2ecf20Sopenharmony_ci		return;
13858c2ecf20Sopenharmony_ci	}
13868c2ecf20Sopenharmony_ci
13878c2ecf20Sopenharmony_ci	agg->F = agg->S;
13888c2ecf20Sopenharmony_ci	qfq_slot_remove(q, grp, agg);
13898c2ecf20Sopenharmony_ci
13908c2ecf20Sopenharmony_ci	if (!grp->full_slots) {
13918c2ecf20Sopenharmony_ci		__clear_bit(grp->index, &q->bitmaps[IR]);
13928c2ecf20Sopenharmony_ci		__clear_bit(grp->index, &q->bitmaps[EB]);
13938c2ecf20Sopenharmony_ci		__clear_bit(grp->index, &q->bitmaps[IB]);
13948c2ecf20Sopenharmony_ci
13958c2ecf20Sopenharmony_ci		if (test_bit(grp->index, &q->bitmaps[ER]) &&
13968c2ecf20Sopenharmony_ci		    !(q->bitmaps[ER] & ~((1UL << grp->index) - 1))) {
13978c2ecf20Sopenharmony_ci			mask = q->bitmaps[ER] & ((1UL << grp->index) - 1);
13988c2ecf20Sopenharmony_ci			if (mask)
13998c2ecf20Sopenharmony_ci				mask = ~((1UL << __fls(mask)) - 1);
14008c2ecf20Sopenharmony_ci			else
14018c2ecf20Sopenharmony_ci				mask = ~0UL;
14028c2ecf20Sopenharmony_ci			qfq_move_groups(q, mask, EB, ER);
14038c2ecf20Sopenharmony_ci			qfq_move_groups(q, mask, IB, IR);
14048c2ecf20Sopenharmony_ci		}
14058c2ecf20Sopenharmony_ci		__clear_bit(grp->index, &q->bitmaps[ER]);
14068c2ecf20Sopenharmony_ci	} else if (hlist_empty(&grp->slots[grp->front])) {
14078c2ecf20Sopenharmony_ci		agg = qfq_slot_scan(grp);
14088c2ecf20Sopenharmony_ci		roundedS = qfq_round_down(agg->S, grp->slot_shift);
14098c2ecf20Sopenharmony_ci		if (grp->S != roundedS) {
14108c2ecf20Sopenharmony_ci			__clear_bit(grp->index, &q->bitmaps[ER]);
14118c2ecf20Sopenharmony_ci			__clear_bit(grp->index, &q->bitmaps[IR]);
14128c2ecf20Sopenharmony_ci			__clear_bit(grp->index, &q->bitmaps[EB]);
14138c2ecf20Sopenharmony_ci			__clear_bit(grp->index, &q->bitmaps[IB]);
14148c2ecf20Sopenharmony_ci			grp->S = roundedS;
14158c2ecf20Sopenharmony_ci			grp->F = roundedS + (2ULL << grp->slot_shift);
14168c2ecf20Sopenharmony_ci			s = qfq_calc_state(q, grp);
14178c2ecf20Sopenharmony_ci			__set_bit(grp->index, &q->bitmaps[s]);
14188c2ecf20Sopenharmony_ci		}
14198c2ecf20Sopenharmony_ci	}
14208c2ecf20Sopenharmony_ci}
14218c2ecf20Sopenharmony_ci
14228c2ecf20Sopenharmony_cistatic void qfq_qlen_notify(struct Qdisc *sch, unsigned long arg)
14238c2ecf20Sopenharmony_ci{
14248c2ecf20Sopenharmony_ci	struct qfq_sched *q = qdisc_priv(sch);
14258c2ecf20Sopenharmony_ci	struct qfq_class *cl = (struct qfq_class *)arg;
14268c2ecf20Sopenharmony_ci
14278c2ecf20Sopenharmony_ci	qfq_deactivate_class(q, cl);
14288c2ecf20Sopenharmony_ci}
14298c2ecf20Sopenharmony_ci
14308c2ecf20Sopenharmony_cistatic int qfq_init_qdisc(struct Qdisc *sch, struct nlattr *opt,
14318c2ecf20Sopenharmony_ci			  struct netlink_ext_ack *extack)
14328c2ecf20Sopenharmony_ci{
14338c2ecf20Sopenharmony_ci	struct qfq_sched *q = qdisc_priv(sch);
14348c2ecf20Sopenharmony_ci	struct qfq_group *grp;
14358c2ecf20Sopenharmony_ci	int i, j, err;
14368c2ecf20Sopenharmony_ci	u32 max_cl_shift, maxbudg_shift, max_classes;
14378c2ecf20Sopenharmony_ci
14388c2ecf20Sopenharmony_ci	err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
14398c2ecf20Sopenharmony_ci	if (err)
14408c2ecf20Sopenharmony_ci		return err;
14418c2ecf20Sopenharmony_ci
14428c2ecf20Sopenharmony_ci	err = qdisc_class_hash_init(&q->clhash);
14438c2ecf20Sopenharmony_ci	if (err < 0)
14448c2ecf20Sopenharmony_ci		return err;
14458c2ecf20Sopenharmony_ci
14468c2ecf20Sopenharmony_ci	max_classes = min_t(u64, (u64)qdisc_dev(sch)->tx_queue_len + 1,
14478c2ecf20Sopenharmony_ci			    QFQ_MAX_AGG_CLASSES);
14488c2ecf20Sopenharmony_ci	/* max_cl_shift = floor(log_2(max_classes)) */
14498c2ecf20Sopenharmony_ci	max_cl_shift = __fls(max_classes);
14508c2ecf20Sopenharmony_ci	q->max_agg_classes = 1<<max_cl_shift;
14518c2ecf20Sopenharmony_ci
14528c2ecf20Sopenharmony_ci	/* maxbudg_shift = log2(max_len * max_classes_per_agg) */
14538c2ecf20Sopenharmony_ci	maxbudg_shift = QFQ_MTU_SHIFT + max_cl_shift;
14548c2ecf20Sopenharmony_ci	q->min_slot_shift = FRAC_BITS + maxbudg_shift - QFQ_MAX_INDEX;
14558c2ecf20Sopenharmony_ci
14568c2ecf20Sopenharmony_ci	for (i = 0; i <= QFQ_MAX_INDEX; i++) {
14578c2ecf20Sopenharmony_ci		grp = &q->groups[i];
14588c2ecf20Sopenharmony_ci		grp->index = i;
14598c2ecf20Sopenharmony_ci		grp->slot_shift = q->min_slot_shift + i;
14608c2ecf20Sopenharmony_ci		for (j = 0; j < QFQ_MAX_SLOTS; j++)
14618c2ecf20Sopenharmony_ci			INIT_HLIST_HEAD(&grp->slots[j]);
14628c2ecf20Sopenharmony_ci	}
14638c2ecf20Sopenharmony_ci
14648c2ecf20Sopenharmony_ci	INIT_HLIST_HEAD(&q->nonfull_aggs);
14658c2ecf20Sopenharmony_ci
14668c2ecf20Sopenharmony_ci	return 0;
14678c2ecf20Sopenharmony_ci}
14688c2ecf20Sopenharmony_ci
14698c2ecf20Sopenharmony_cistatic void qfq_reset_qdisc(struct Qdisc *sch)
14708c2ecf20Sopenharmony_ci{
14718c2ecf20Sopenharmony_ci	struct qfq_sched *q = qdisc_priv(sch);
14728c2ecf20Sopenharmony_ci	struct qfq_class *cl;
14738c2ecf20Sopenharmony_ci	unsigned int i;
14748c2ecf20Sopenharmony_ci
14758c2ecf20Sopenharmony_ci	for (i = 0; i < q->clhash.hashsize; i++) {
14768c2ecf20Sopenharmony_ci		hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
14778c2ecf20Sopenharmony_ci			if (cl->qdisc->q.qlen > 0)
14788c2ecf20Sopenharmony_ci				qfq_deactivate_class(q, cl);
14798c2ecf20Sopenharmony_ci
14808c2ecf20Sopenharmony_ci			qdisc_reset(cl->qdisc);
14818c2ecf20Sopenharmony_ci		}
14828c2ecf20Sopenharmony_ci	}
14838c2ecf20Sopenharmony_ci}
14848c2ecf20Sopenharmony_ci
14858c2ecf20Sopenharmony_cistatic void qfq_destroy_qdisc(struct Qdisc *sch)
14868c2ecf20Sopenharmony_ci{
14878c2ecf20Sopenharmony_ci	struct qfq_sched *q = qdisc_priv(sch);
14888c2ecf20Sopenharmony_ci	struct qfq_class *cl;
14898c2ecf20Sopenharmony_ci	struct hlist_node *next;
14908c2ecf20Sopenharmony_ci	unsigned int i;
14918c2ecf20Sopenharmony_ci
14928c2ecf20Sopenharmony_ci	tcf_block_put(q->block);
14938c2ecf20Sopenharmony_ci
14948c2ecf20Sopenharmony_ci	for (i = 0; i < q->clhash.hashsize; i++) {
14958c2ecf20Sopenharmony_ci		hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
14968c2ecf20Sopenharmony_ci					  common.hnode) {
14978c2ecf20Sopenharmony_ci			qfq_destroy_class(sch, cl);
14988c2ecf20Sopenharmony_ci		}
14998c2ecf20Sopenharmony_ci	}
15008c2ecf20Sopenharmony_ci	qdisc_class_hash_destroy(&q->clhash);
15018c2ecf20Sopenharmony_ci}
15028c2ecf20Sopenharmony_ci
15038c2ecf20Sopenharmony_cistatic const struct Qdisc_class_ops qfq_class_ops = {
15048c2ecf20Sopenharmony_ci	.change		= qfq_change_class,
15058c2ecf20Sopenharmony_ci	.delete		= qfq_delete_class,
15068c2ecf20Sopenharmony_ci	.find		= qfq_search_class,
15078c2ecf20Sopenharmony_ci	.tcf_block	= qfq_tcf_block,
15088c2ecf20Sopenharmony_ci	.bind_tcf	= qfq_bind_tcf,
15098c2ecf20Sopenharmony_ci	.unbind_tcf	= qfq_unbind_tcf,
15108c2ecf20Sopenharmony_ci	.graft		= qfq_graft_class,
15118c2ecf20Sopenharmony_ci	.leaf		= qfq_class_leaf,
15128c2ecf20Sopenharmony_ci	.qlen_notify	= qfq_qlen_notify,
15138c2ecf20Sopenharmony_ci	.dump		= qfq_dump_class,
15148c2ecf20Sopenharmony_ci	.dump_stats	= qfq_dump_class_stats,
15158c2ecf20Sopenharmony_ci	.walk		= qfq_walk,
15168c2ecf20Sopenharmony_ci};
15178c2ecf20Sopenharmony_ci
15188c2ecf20Sopenharmony_cistatic struct Qdisc_ops qfq_qdisc_ops __read_mostly = {
15198c2ecf20Sopenharmony_ci	.cl_ops		= &qfq_class_ops,
15208c2ecf20Sopenharmony_ci	.id		= "qfq",
15218c2ecf20Sopenharmony_ci	.priv_size	= sizeof(struct qfq_sched),
15228c2ecf20Sopenharmony_ci	.enqueue	= qfq_enqueue,
15238c2ecf20Sopenharmony_ci	.dequeue	= qfq_dequeue,
15248c2ecf20Sopenharmony_ci	.peek		= qdisc_peek_dequeued,
15258c2ecf20Sopenharmony_ci	.init		= qfq_init_qdisc,
15268c2ecf20Sopenharmony_ci	.reset		= qfq_reset_qdisc,
15278c2ecf20Sopenharmony_ci	.destroy	= qfq_destroy_qdisc,
15288c2ecf20Sopenharmony_ci	.owner		= THIS_MODULE,
15298c2ecf20Sopenharmony_ci};
15308c2ecf20Sopenharmony_ci
15318c2ecf20Sopenharmony_cistatic int __init qfq_init(void)
15328c2ecf20Sopenharmony_ci{
15338c2ecf20Sopenharmony_ci	return register_qdisc(&qfq_qdisc_ops);
15348c2ecf20Sopenharmony_ci}
15358c2ecf20Sopenharmony_ci
15368c2ecf20Sopenharmony_cistatic void __exit qfq_exit(void)
15378c2ecf20Sopenharmony_ci{
15388c2ecf20Sopenharmony_ci	unregister_qdisc(&qfq_qdisc_ops);
15398c2ecf20Sopenharmony_ci}
15408c2ecf20Sopenharmony_ci
15418c2ecf20Sopenharmony_cimodule_init(qfq_init);
15428c2ecf20Sopenharmony_cimodule_exit(qfq_exit);
15438c2ecf20Sopenharmony_ciMODULE_LICENSE("GPL");
1544