162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * net/sched/sch_fq.c Fair Queue Packet Scheduler (per flow pacing)
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci *  Copyright (C) 2013-2015 Eric Dumazet <edumazet@google.com>
662306a36Sopenharmony_ci *
762306a36Sopenharmony_ci *  Meant to be mostly used for locally generated traffic :
862306a36Sopenharmony_ci *  Fast classification depends on skb->sk being set before reaching us.
962306a36Sopenharmony_ci *  If not, (router workload), we use rxhash as fallback, with 32 bits wide hash.
1062306a36Sopenharmony_ci *  All packets belonging to a socket are considered as a 'flow'.
1162306a36Sopenharmony_ci *
1262306a36Sopenharmony_ci *  Flows are dynamically allocated and stored in a hash table of RB trees
1362306a36Sopenharmony_ci *  They are also part of one Round Robin 'queues' (new or old flows)
1462306a36Sopenharmony_ci *
1562306a36Sopenharmony_ci *  Burst avoidance (aka pacing) capability :
1662306a36Sopenharmony_ci *
1762306a36Sopenharmony_ci *  Transport (eg TCP) can set in sk->sk_pacing_rate a rate, enqueue a
1862306a36Sopenharmony_ci *  bunch of packets, and this packet scheduler adds delay between
1962306a36Sopenharmony_ci *  packets to respect rate limitation.
2062306a36Sopenharmony_ci *
2162306a36Sopenharmony_ci *  enqueue() :
2262306a36Sopenharmony_ci *   - lookup one RB tree (out of 1024 or more) to find the flow.
2362306a36Sopenharmony_ci *     If non existent flow, create it, add it to the tree.
2462306a36Sopenharmony_ci *     Add skb to the per flow list of skb (fifo).
2562306a36Sopenharmony_ci *   - Use a special fifo for high prio packets
2662306a36Sopenharmony_ci *
2762306a36Sopenharmony_ci *  dequeue() : serves flows in Round Robin
2862306a36Sopenharmony_ci *  Note : When a flow becomes empty, we do not immediately remove it from
2962306a36Sopenharmony_ci *  rb trees, for performance reasons (its expected to send additional packets,
3062306a36Sopenharmony_ci *  or SLAB cache will reuse socket for another flow)
3162306a36Sopenharmony_ci */
3262306a36Sopenharmony_ci
3362306a36Sopenharmony_ci#include <linux/module.h>
3462306a36Sopenharmony_ci#include <linux/types.h>
3562306a36Sopenharmony_ci#include <linux/kernel.h>
3662306a36Sopenharmony_ci#include <linux/jiffies.h>
3762306a36Sopenharmony_ci#include <linux/string.h>
3862306a36Sopenharmony_ci#include <linux/in.h>
3962306a36Sopenharmony_ci#include <linux/errno.h>
4062306a36Sopenharmony_ci#include <linux/init.h>
4162306a36Sopenharmony_ci#include <linux/skbuff.h>
4262306a36Sopenharmony_ci#include <linux/slab.h>
4362306a36Sopenharmony_ci#include <linux/rbtree.h>
4462306a36Sopenharmony_ci#include <linux/hash.h>
4562306a36Sopenharmony_ci#include <linux/prefetch.h>
4662306a36Sopenharmony_ci#include <linux/vmalloc.h>
4762306a36Sopenharmony_ci#include <net/netlink.h>
4862306a36Sopenharmony_ci#include <net/pkt_sched.h>
4962306a36Sopenharmony_ci#include <net/sock.h>
5062306a36Sopenharmony_ci#include <net/tcp_states.h>
5162306a36Sopenharmony_ci#include <net/tcp.h>
5262306a36Sopenharmony_ci
5362306a36Sopenharmony_cistruct fq_skb_cb {
5462306a36Sopenharmony_ci	u64	        time_to_send;
5562306a36Sopenharmony_ci};
5662306a36Sopenharmony_ci
5762306a36Sopenharmony_cistatic inline struct fq_skb_cb *fq_skb_cb(struct sk_buff *skb)
5862306a36Sopenharmony_ci{
5962306a36Sopenharmony_ci	qdisc_cb_private_validate(skb, sizeof(struct fq_skb_cb));
6062306a36Sopenharmony_ci	return (struct fq_skb_cb *)qdisc_skb_cb(skb)->data;
6162306a36Sopenharmony_ci}
6262306a36Sopenharmony_ci
6362306a36Sopenharmony_ci/*
6462306a36Sopenharmony_ci * Per flow structure, dynamically allocated.
6562306a36Sopenharmony_ci * If packets have monotically increasing time_to_send, they are placed in O(1)
6662306a36Sopenharmony_ci * in linear list (head,tail), otherwise are placed in a rbtree (t_root).
6762306a36Sopenharmony_ci */
6862306a36Sopenharmony_cistruct fq_flow {
6962306a36Sopenharmony_ci/* First cache line : used in fq_gc(), fq_enqueue(), fq_dequeue() */
7062306a36Sopenharmony_ci	struct rb_root	t_root;
7162306a36Sopenharmony_ci	struct sk_buff	*head;		/* list of skbs for this flow : first skb */
7262306a36Sopenharmony_ci	union {
7362306a36Sopenharmony_ci		struct sk_buff *tail;	/* last skb in the list */
7462306a36Sopenharmony_ci		unsigned long  age;	/* (jiffies | 1UL) when flow was emptied, for gc */
7562306a36Sopenharmony_ci	};
7662306a36Sopenharmony_ci	struct rb_node	fq_node;	/* anchor in fq_root[] trees */
7762306a36Sopenharmony_ci	struct sock	*sk;
7862306a36Sopenharmony_ci	u32		socket_hash;	/* sk_hash */
7962306a36Sopenharmony_ci	int		qlen;		/* number of packets in flow queue */
8062306a36Sopenharmony_ci
8162306a36Sopenharmony_ci/* Second cache line, used in fq_dequeue() */
8262306a36Sopenharmony_ci	int		credit;
8362306a36Sopenharmony_ci	/* 32bit hole on 64bit arches */
8462306a36Sopenharmony_ci
8562306a36Sopenharmony_ci	struct fq_flow *next;		/* next pointer in RR lists */
8662306a36Sopenharmony_ci
8762306a36Sopenharmony_ci	struct rb_node  rate_node;	/* anchor in q->delayed tree */
8862306a36Sopenharmony_ci	u64		time_next_packet;
8962306a36Sopenharmony_ci} ____cacheline_aligned_in_smp;
9062306a36Sopenharmony_ci
9162306a36Sopenharmony_cistruct fq_flow_head {
9262306a36Sopenharmony_ci	struct fq_flow *first;
9362306a36Sopenharmony_ci	struct fq_flow *last;
9462306a36Sopenharmony_ci};
9562306a36Sopenharmony_ci
9662306a36Sopenharmony_cistruct fq_sched_data {
9762306a36Sopenharmony_ci	struct fq_flow_head new_flows;
9862306a36Sopenharmony_ci
9962306a36Sopenharmony_ci	struct fq_flow_head old_flows;
10062306a36Sopenharmony_ci
10162306a36Sopenharmony_ci	struct rb_root	delayed;	/* for rate limited flows */
10262306a36Sopenharmony_ci	u64		time_next_delayed_flow;
10362306a36Sopenharmony_ci	u64		ktime_cache;	/* copy of last ktime_get_ns() */
10462306a36Sopenharmony_ci	unsigned long	unthrottle_latency_ns;
10562306a36Sopenharmony_ci
10662306a36Sopenharmony_ci	struct fq_flow	internal;	/* for non classified or high prio packets */
10762306a36Sopenharmony_ci	u32		quantum;
10862306a36Sopenharmony_ci	u32		initial_quantum;
10962306a36Sopenharmony_ci	u32		flow_refill_delay;
11062306a36Sopenharmony_ci	u32		flow_plimit;	/* max packets per flow */
11162306a36Sopenharmony_ci	unsigned long	flow_max_rate;	/* optional max rate per flow */
11262306a36Sopenharmony_ci	u64		ce_threshold;
11362306a36Sopenharmony_ci	u64		horizon;	/* horizon in ns */
11462306a36Sopenharmony_ci	u32		orphan_mask;	/* mask for orphaned skb */
11562306a36Sopenharmony_ci	u32		low_rate_threshold;
11662306a36Sopenharmony_ci	struct rb_root	*fq_root;
11762306a36Sopenharmony_ci	u8		rate_enable;
11862306a36Sopenharmony_ci	u8		fq_trees_log;
11962306a36Sopenharmony_ci	u8		horizon_drop;
12062306a36Sopenharmony_ci	u32		flows;
12162306a36Sopenharmony_ci	u32		inactive_flows;
12262306a36Sopenharmony_ci	u32		throttled_flows;
12362306a36Sopenharmony_ci
12462306a36Sopenharmony_ci	u64		stat_gc_flows;
12562306a36Sopenharmony_ci	u64		stat_internal_packets;
12662306a36Sopenharmony_ci	u64		stat_throttled;
12762306a36Sopenharmony_ci	u64		stat_ce_mark;
12862306a36Sopenharmony_ci	u64		stat_horizon_drops;
12962306a36Sopenharmony_ci	u64		stat_horizon_caps;
13062306a36Sopenharmony_ci	u64		stat_flows_plimit;
13162306a36Sopenharmony_ci	u64		stat_pkts_too_long;
13262306a36Sopenharmony_ci	u64		stat_allocation_errors;
13362306a36Sopenharmony_ci
13462306a36Sopenharmony_ci	u32		timer_slack; /* hrtimer slack in ns */
13562306a36Sopenharmony_ci	struct qdisc_watchdog watchdog;
13662306a36Sopenharmony_ci};
13762306a36Sopenharmony_ci
13862306a36Sopenharmony_ci/*
13962306a36Sopenharmony_ci * f->tail and f->age share the same location.
14062306a36Sopenharmony_ci * We can use the low order bit to differentiate if this location points
14162306a36Sopenharmony_ci * to a sk_buff or contains a jiffies value, if we force this value to be odd.
14262306a36Sopenharmony_ci * This assumes f->tail low order bit must be 0 since alignof(struct sk_buff) >= 2
14362306a36Sopenharmony_ci */
14462306a36Sopenharmony_cistatic void fq_flow_set_detached(struct fq_flow *f)
14562306a36Sopenharmony_ci{
14662306a36Sopenharmony_ci	f->age = jiffies | 1UL;
14762306a36Sopenharmony_ci}
14862306a36Sopenharmony_ci
14962306a36Sopenharmony_cistatic bool fq_flow_is_detached(const struct fq_flow *f)
15062306a36Sopenharmony_ci{
15162306a36Sopenharmony_ci	return !!(f->age & 1UL);
15262306a36Sopenharmony_ci}
15362306a36Sopenharmony_ci
15462306a36Sopenharmony_ci/* special value to mark a throttled flow (not on old/new list) */
15562306a36Sopenharmony_cistatic struct fq_flow throttled;
15662306a36Sopenharmony_ci
15762306a36Sopenharmony_cistatic bool fq_flow_is_throttled(const struct fq_flow *f)
15862306a36Sopenharmony_ci{
15962306a36Sopenharmony_ci	return f->next == &throttled;
16062306a36Sopenharmony_ci}
16162306a36Sopenharmony_ci
16262306a36Sopenharmony_cistatic void fq_flow_add_tail(struct fq_flow_head *head, struct fq_flow *flow)
16362306a36Sopenharmony_ci{
16462306a36Sopenharmony_ci	if (head->first)
16562306a36Sopenharmony_ci		head->last->next = flow;
16662306a36Sopenharmony_ci	else
16762306a36Sopenharmony_ci		head->first = flow;
16862306a36Sopenharmony_ci	head->last = flow;
16962306a36Sopenharmony_ci	flow->next = NULL;
17062306a36Sopenharmony_ci}
17162306a36Sopenharmony_ci
17262306a36Sopenharmony_cistatic void fq_flow_unset_throttled(struct fq_sched_data *q, struct fq_flow *f)
17362306a36Sopenharmony_ci{
17462306a36Sopenharmony_ci	rb_erase(&f->rate_node, &q->delayed);
17562306a36Sopenharmony_ci	q->throttled_flows--;
17662306a36Sopenharmony_ci	fq_flow_add_tail(&q->old_flows, f);
17762306a36Sopenharmony_ci}
17862306a36Sopenharmony_ci
17962306a36Sopenharmony_cistatic void fq_flow_set_throttled(struct fq_sched_data *q, struct fq_flow *f)
18062306a36Sopenharmony_ci{
18162306a36Sopenharmony_ci	struct rb_node **p = &q->delayed.rb_node, *parent = NULL;
18262306a36Sopenharmony_ci
18362306a36Sopenharmony_ci	while (*p) {
18462306a36Sopenharmony_ci		struct fq_flow *aux;
18562306a36Sopenharmony_ci
18662306a36Sopenharmony_ci		parent = *p;
18762306a36Sopenharmony_ci		aux = rb_entry(parent, struct fq_flow, rate_node);
18862306a36Sopenharmony_ci		if (f->time_next_packet >= aux->time_next_packet)
18962306a36Sopenharmony_ci			p = &parent->rb_right;
19062306a36Sopenharmony_ci		else
19162306a36Sopenharmony_ci			p = &parent->rb_left;
19262306a36Sopenharmony_ci	}
19362306a36Sopenharmony_ci	rb_link_node(&f->rate_node, parent, p);
19462306a36Sopenharmony_ci	rb_insert_color(&f->rate_node, &q->delayed);
19562306a36Sopenharmony_ci	q->throttled_flows++;
19662306a36Sopenharmony_ci	q->stat_throttled++;
19762306a36Sopenharmony_ci
19862306a36Sopenharmony_ci	f->next = &throttled;
19962306a36Sopenharmony_ci	if (q->time_next_delayed_flow > f->time_next_packet)
20062306a36Sopenharmony_ci		q->time_next_delayed_flow = f->time_next_packet;
20162306a36Sopenharmony_ci}
20262306a36Sopenharmony_ci
20362306a36Sopenharmony_ci
20462306a36Sopenharmony_cistatic struct kmem_cache *fq_flow_cachep __read_mostly;
20562306a36Sopenharmony_ci
20662306a36Sopenharmony_ci
20762306a36Sopenharmony_ci/* limit number of collected flows per round */
20862306a36Sopenharmony_ci#define FQ_GC_MAX 8
20962306a36Sopenharmony_ci#define FQ_GC_AGE (3*HZ)
21062306a36Sopenharmony_ci
21162306a36Sopenharmony_cistatic bool fq_gc_candidate(const struct fq_flow *f)
21262306a36Sopenharmony_ci{
21362306a36Sopenharmony_ci	return fq_flow_is_detached(f) &&
21462306a36Sopenharmony_ci	       time_after(jiffies, f->age + FQ_GC_AGE);
21562306a36Sopenharmony_ci}
21662306a36Sopenharmony_ci
21762306a36Sopenharmony_cistatic void fq_gc(struct fq_sched_data *q,
21862306a36Sopenharmony_ci		  struct rb_root *root,
21962306a36Sopenharmony_ci		  struct sock *sk)
22062306a36Sopenharmony_ci{
22162306a36Sopenharmony_ci	struct rb_node **p, *parent;
22262306a36Sopenharmony_ci	void *tofree[FQ_GC_MAX];
22362306a36Sopenharmony_ci	struct fq_flow *f;
22462306a36Sopenharmony_ci	int i, fcnt = 0;
22562306a36Sopenharmony_ci
22662306a36Sopenharmony_ci	p = &root->rb_node;
22762306a36Sopenharmony_ci	parent = NULL;
22862306a36Sopenharmony_ci	while (*p) {
22962306a36Sopenharmony_ci		parent = *p;
23062306a36Sopenharmony_ci
23162306a36Sopenharmony_ci		f = rb_entry(parent, struct fq_flow, fq_node);
23262306a36Sopenharmony_ci		if (f->sk == sk)
23362306a36Sopenharmony_ci			break;
23462306a36Sopenharmony_ci
23562306a36Sopenharmony_ci		if (fq_gc_candidate(f)) {
23662306a36Sopenharmony_ci			tofree[fcnt++] = f;
23762306a36Sopenharmony_ci			if (fcnt == FQ_GC_MAX)
23862306a36Sopenharmony_ci				break;
23962306a36Sopenharmony_ci		}
24062306a36Sopenharmony_ci
24162306a36Sopenharmony_ci		if (f->sk > sk)
24262306a36Sopenharmony_ci			p = &parent->rb_right;
24362306a36Sopenharmony_ci		else
24462306a36Sopenharmony_ci			p = &parent->rb_left;
24562306a36Sopenharmony_ci	}
24662306a36Sopenharmony_ci
24762306a36Sopenharmony_ci	if (!fcnt)
24862306a36Sopenharmony_ci		return;
24962306a36Sopenharmony_ci
25062306a36Sopenharmony_ci	for (i = fcnt; i > 0; ) {
25162306a36Sopenharmony_ci		f = tofree[--i];
25262306a36Sopenharmony_ci		rb_erase(&f->fq_node, root);
25362306a36Sopenharmony_ci	}
25462306a36Sopenharmony_ci	q->flows -= fcnt;
25562306a36Sopenharmony_ci	q->inactive_flows -= fcnt;
25662306a36Sopenharmony_ci	q->stat_gc_flows += fcnt;
25762306a36Sopenharmony_ci
25862306a36Sopenharmony_ci	kmem_cache_free_bulk(fq_flow_cachep, fcnt, tofree);
25962306a36Sopenharmony_ci}
26062306a36Sopenharmony_ci
26162306a36Sopenharmony_cistatic struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q)
26262306a36Sopenharmony_ci{
26362306a36Sopenharmony_ci	struct rb_node **p, *parent;
26462306a36Sopenharmony_ci	struct sock *sk = skb->sk;
26562306a36Sopenharmony_ci	struct rb_root *root;
26662306a36Sopenharmony_ci	struct fq_flow *f;
26762306a36Sopenharmony_ci
26862306a36Sopenharmony_ci	/* warning: no starvation prevention... */
26962306a36Sopenharmony_ci	if (unlikely((skb->priority & TC_PRIO_MAX) == TC_PRIO_CONTROL))
27062306a36Sopenharmony_ci		return &q->internal;
27162306a36Sopenharmony_ci
27262306a36Sopenharmony_ci	/* SYNACK messages are attached to a TCP_NEW_SYN_RECV request socket
27362306a36Sopenharmony_ci	 * or a listener (SYNCOOKIE mode)
27462306a36Sopenharmony_ci	 * 1) request sockets are not full blown,
27562306a36Sopenharmony_ci	 *    they do not contain sk_pacing_rate
27662306a36Sopenharmony_ci	 * 2) They are not part of a 'flow' yet
27762306a36Sopenharmony_ci	 * 3) We do not want to rate limit them (eg SYNFLOOD attack),
27862306a36Sopenharmony_ci	 *    especially if the listener set SO_MAX_PACING_RATE
27962306a36Sopenharmony_ci	 * 4) We pretend they are orphaned
28062306a36Sopenharmony_ci	 */
28162306a36Sopenharmony_ci	if (!sk || sk_listener(sk)) {
28262306a36Sopenharmony_ci		unsigned long hash = skb_get_hash(skb) & q->orphan_mask;
28362306a36Sopenharmony_ci
28462306a36Sopenharmony_ci		/* By forcing low order bit to 1, we make sure to not
28562306a36Sopenharmony_ci		 * collide with a local flow (socket pointers are word aligned)
28662306a36Sopenharmony_ci		 */
28762306a36Sopenharmony_ci		sk = (struct sock *)((hash << 1) | 1UL);
28862306a36Sopenharmony_ci		skb_orphan(skb);
28962306a36Sopenharmony_ci	} else if (sk->sk_state == TCP_CLOSE) {
29062306a36Sopenharmony_ci		unsigned long hash = skb_get_hash(skb) & q->orphan_mask;
29162306a36Sopenharmony_ci		/*
29262306a36Sopenharmony_ci		 * Sockets in TCP_CLOSE are non connected.
29362306a36Sopenharmony_ci		 * Typical use case is UDP sockets, they can send packets
29462306a36Sopenharmony_ci		 * with sendto() to many different destinations.
29562306a36Sopenharmony_ci		 * We probably could use a generic bit advertising
29662306a36Sopenharmony_ci		 * non connected sockets, instead of sk_state == TCP_CLOSE,
29762306a36Sopenharmony_ci		 * if we care enough.
29862306a36Sopenharmony_ci		 */
29962306a36Sopenharmony_ci		sk = (struct sock *)((hash << 1) | 1UL);
30062306a36Sopenharmony_ci	}
30162306a36Sopenharmony_ci
30262306a36Sopenharmony_ci	root = &q->fq_root[hash_ptr(sk, q->fq_trees_log)];
30362306a36Sopenharmony_ci
30462306a36Sopenharmony_ci	if (q->flows >= (2U << q->fq_trees_log) &&
30562306a36Sopenharmony_ci	    q->inactive_flows > q->flows/2)
30662306a36Sopenharmony_ci		fq_gc(q, root, sk);
30762306a36Sopenharmony_ci
30862306a36Sopenharmony_ci	p = &root->rb_node;
30962306a36Sopenharmony_ci	parent = NULL;
31062306a36Sopenharmony_ci	while (*p) {
31162306a36Sopenharmony_ci		parent = *p;
31262306a36Sopenharmony_ci
31362306a36Sopenharmony_ci		f = rb_entry(parent, struct fq_flow, fq_node);
31462306a36Sopenharmony_ci		if (f->sk == sk) {
31562306a36Sopenharmony_ci			/* socket might have been reallocated, so check
31662306a36Sopenharmony_ci			 * if its sk_hash is the same.
31762306a36Sopenharmony_ci			 * It not, we need to refill credit with
31862306a36Sopenharmony_ci			 * initial quantum
31962306a36Sopenharmony_ci			 */
32062306a36Sopenharmony_ci			if (unlikely(skb->sk == sk &&
32162306a36Sopenharmony_ci				     f->socket_hash != sk->sk_hash)) {
32262306a36Sopenharmony_ci				f->credit = q->initial_quantum;
32362306a36Sopenharmony_ci				f->socket_hash = sk->sk_hash;
32462306a36Sopenharmony_ci				if (q->rate_enable)
32562306a36Sopenharmony_ci					smp_store_release(&sk->sk_pacing_status,
32662306a36Sopenharmony_ci							  SK_PACING_FQ);
32762306a36Sopenharmony_ci				if (fq_flow_is_throttled(f))
32862306a36Sopenharmony_ci					fq_flow_unset_throttled(q, f);
32962306a36Sopenharmony_ci				f->time_next_packet = 0ULL;
33062306a36Sopenharmony_ci			}
33162306a36Sopenharmony_ci			return f;
33262306a36Sopenharmony_ci		}
33362306a36Sopenharmony_ci		if (f->sk > sk)
33462306a36Sopenharmony_ci			p = &parent->rb_right;
33562306a36Sopenharmony_ci		else
33662306a36Sopenharmony_ci			p = &parent->rb_left;
33762306a36Sopenharmony_ci	}
33862306a36Sopenharmony_ci
33962306a36Sopenharmony_ci	f = kmem_cache_zalloc(fq_flow_cachep, GFP_ATOMIC | __GFP_NOWARN);
34062306a36Sopenharmony_ci	if (unlikely(!f)) {
34162306a36Sopenharmony_ci		q->stat_allocation_errors++;
34262306a36Sopenharmony_ci		return &q->internal;
34362306a36Sopenharmony_ci	}
34462306a36Sopenharmony_ci	/* f->t_root is already zeroed after kmem_cache_zalloc() */
34562306a36Sopenharmony_ci
34662306a36Sopenharmony_ci	fq_flow_set_detached(f);
34762306a36Sopenharmony_ci	f->sk = sk;
34862306a36Sopenharmony_ci	if (skb->sk == sk) {
34962306a36Sopenharmony_ci		f->socket_hash = sk->sk_hash;
35062306a36Sopenharmony_ci		if (q->rate_enable)
35162306a36Sopenharmony_ci			smp_store_release(&sk->sk_pacing_status,
35262306a36Sopenharmony_ci					  SK_PACING_FQ);
35362306a36Sopenharmony_ci	}
35462306a36Sopenharmony_ci	f->credit = q->initial_quantum;
35562306a36Sopenharmony_ci
35662306a36Sopenharmony_ci	rb_link_node(&f->fq_node, parent, p);
35762306a36Sopenharmony_ci	rb_insert_color(&f->fq_node, root);
35862306a36Sopenharmony_ci
35962306a36Sopenharmony_ci	q->flows++;
36062306a36Sopenharmony_ci	q->inactive_flows++;
36162306a36Sopenharmony_ci	return f;
36262306a36Sopenharmony_ci}
36362306a36Sopenharmony_ci
36462306a36Sopenharmony_cistatic struct sk_buff *fq_peek(struct fq_flow *flow)
36562306a36Sopenharmony_ci{
36662306a36Sopenharmony_ci	struct sk_buff *skb = skb_rb_first(&flow->t_root);
36762306a36Sopenharmony_ci	struct sk_buff *head = flow->head;
36862306a36Sopenharmony_ci
36962306a36Sopenharmony_ci	if (!skb)
37062306a36Sopenharmony_ci		return head;
37162306a36Sopenharmony_ci
37262306a36Sopenharmony_ci	if (!head)
37362306a36Sopenharmony_ci		return skb;
37462306a36Sopenharmony_ci
37562306a36Sopenharmony_ci	if (fq_skb_cb(skb)->time_to_send < fq_skb_cb(head)->time_to_send)
37662306a36Sopenharmony_ci		return skb;
37762306a36Sopenharmony_ci	return head;
37862306a36Sopenharmony_ci}
37962306a36Sopenharmony_ci
38062306a36Sopenharmony_cistatic void fq_erase_head(struct Qdisc *sch, struct fq_flow *flow,
38162306a36Sopenharmony_ci			  struct sk_buff *skb)
38262306a36Sopenharmony_ci{
38362306a36Sopenharmony_ci	if (skb == flow->head) {
38462306a36Sopenharmony_ci		flow->head = skb->next;
38562306a36Sopenharmony_ci	} else {
38662306a36Sopenharmony_ci		rb_erase(&skb->rbnode, &flow->t_root);
38762306a36Sopenharmony_ci		skb->dev = qdisc_dev(sch);
38862306a36Sopenharmony_ci	}
38962306a36Sopenharmony_ci}
39062306a36Sopenharmony_ci
39162306a36Sopenharmony_ci/* Remove one skb from flow queue.
39262306a36Sopenharmony_ci * This skb must be the return value of prior fq_peek().
39362306a36Sopenharmony_ci */
39462306a36Sopenharmony_cistatic void fq_dequeue_skb(struct Qdisc *sch, struct fq_flow *flow,
39562306a36Sopenharmony_ci			   struct sk_buff *skb)
39662306a36Sopenharmony_ci{
39762306a36Sopenharmony_ci	fq_erase_head(sch, flow, skb);
39862306a36Sopenharmony_ci	skb_mark_not_on_list(skb);
39962306a36Sopenharmony_ci	flow->qlen--;
40062306a36Sopenharmony_ci	qdisc_qstats_backlog_dec(sch, skb);
40162306a36Sopenharmony_ci	sch->q.qlen--;
40262306a36Sopenharmony_ci}
40362306a36Sopenharmony_ci
40462306a36Sopenharmony_cistatic void flow_queue_add(struct fq_flow *flow, struct sk_buff *skb)
40562306a36Sopenharmony_ci{
40662306a36Sopenharmony_ci	struct rb_node **p, *parent;
40762306a36Sopenharmony_ci	struct sk_buff *head, *aux;
40862306a36Sopenharmony_ci
40962306a36Sopenharmony_ci	head = flow->head;
41062306a36Sopenharmony_ci	if (!head ||
41162306a36Sopenharmony_ci	    fq_skb_cb(skb)->time_to_send >= fq_skb_cb(flow->tail)->time_to_send) {
41262306a36Sopenharmony_ci		if (!head)
41362306a36Sopenharmony_ci			flow->head = skb;
41462306a36Sopenharmony_ci		else
41562306a36Sopenharmony_ci			flow->tail->next = skb;
41662306a36Sopenharmony_ci		flow->tail = skb;
41762306a36Sopenharmony_ci		skb->next = NULL;
41862306a36Sopenharmony_ci		return;
41962306a36Sopenharmony_ci	}
42062306a36Sopenharmony_ci
42162306a36Sopenharmony_ci	p = &flow->t_root.rb_node;
42262306a36Sopenharmony_ci	parent = NULL;
42362306a36Sopenharmony_ci
42462306a36Sopenharmony_ci	while (*p) {
42562306a36Sopenharmony_ci		parent = *p;
42662306a36Sopenharmony_ci		aux = rb_to_skb(parent);
42762306a36Sopenharmony_ci		if (fq_skb_cb(skb)->time_to_send >= fq_skb_cb(aux)->time_to_send)
42862306a36Sopenharmony_ci			p = &parent->rb_right;
42962306a36Sopenharmony_ci		else
43062306a36Sopenharmony_ci			p = &parent->rb_left;
43162306a36Sopenharmony_ci	}
43262306a36Sopenharmony_ci	rb_link_node(&skb->rbnode, parent, p);
43362306a36Sopenharmony_ci	rb_insert_color(&skb->rbnode, &flow->t_root);
43462306a36Sopenharmony_ci}
43562306a36Sopenharmony_ci
43662306a36Sopenharmony_cistatic bool fq_packet_beyond_horizon(const struct sk_buff *skb,
43762306a36Sopenharmony_ci				    const struct fq_sched_data *q)
43862306a36Sopenharmony_ci{
43962306a36Sopenharmony_ci	return unlikely((s64)skb->tstamp > (s64)(q->ktime_cache + q->horizon));
44062306a36Sopenharmony_ci}
44162306a36Sopenharmony_ci
44262306a36Sopenharmony_cistatic int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
44362306a36Sopenharmony_ci		      struct sk_buff **to_free)
44462306a36Sopenharmony_ci{
44562306a36Sopenharmony_ci	struct fq_sched_data *q = qdisc_priv(sch);
44662306a36Sopenharmony_ci	struct fq_flow *f;
44762306a36Sopenharmony_ci
44862306a36Sopenharmony_ci	if (unlikely(sch->q.qlen >= sch->limit))
44962306a36Sopenharmony_ci		return qdisc_drop(skb, sch, to_free);
45062306a36Sopenharmony_ci
45162306a36Sopenharmony_ci	if (!skb->tstamp) {
45262306a36Sopenharmony_ci		fq_skb_cb(skb)->time_to_send = q->ktime_cache = ktime_get_ns();
45362306a36Sopenharmony_ci	} else {
45462306a36Sopenharmony_ci		/* Check if packet timestamp is too far in the future.
45562306a36Sopenharmony_ci		 * Try first if our cached value, to avoid ktime_get_ns()
45662306a36Sopenharmony_ci		 * cost in most cases.
45762306a36Sopenharmony_ci		 */
45862306a36Sopenharmony_ci		if (fq_packet_beyond_horizon(skb, q)) {
45962306a36Sopenharmony_ci			/* Refresh our cache and check another time */
46062306a36Sopenharmony_ci			q->ktime_cache = ktime_get_ns();
46162306a36Sopenharmony_ci			if (fq_packet_beyond_horizon(skb, q)) {
46262306a36Sopenharmony_ci				if (q->horizon_drop) {
46362306a36Sopenharmony_ci					q->stat_horizon_drops++;
46462306a36Sopenharmony_ci					return qdisc_drop(skb, sch, to_free);
46562306a36Sopenharmony_ci				}
46662306a36Sopenharmony_ci				q->stat_horizon_caps++;
46762306a36Sopenharmony_ci				skb->tstamp = q->ktime_cache + q->horizon;
46862306a36Sopenharmony_ci			}
46962306a36Sopenharmony_ci		}
47062306a36Sopenharmony_ci		fq_skb_cb(skb)->time_to_send = skb->tstamp;
47162306a36Sopenharmony_ci	}
47262306a36Sopenharmony_ci
47362306a36Sopenharmony_ci	f = fq_classify(skb, q);
47462306a36Sopenharmony_ci	if (unlikely(f->qlen >= q->flow_plimit && f != &q->internal)) {
47562306a36Sopenharmony_ci		q->stat_flows_plimit++;
47662306a36Sopenharmony_ci		return qdisc_drop(skb, sch, to_free);
47762306a36Sopenharmony_ci	}
47862306a36Sopenharmony_ci
47962306a36Sopenharmony_ci	f->qlen++;
48062306a36Sopenharmony_ci	qdisc_qstats_backlog_inc(sch, skb);
48162306a36Sopenharmony_ci	if (fq_flow_is_detached(f)) {
48262306a36Sopenharmony_ci		fq_flow_add_tail(&q->new_flows, f);
48362306a36Sopenharmony_ci		if (time_after(jiffies, f->age + q->flow_refill_delay))
48462306a36Sopenharmony_ci			f->credit = max_t(u32, f->credit, q->quantum);
48562306a36Sopenharmony_ci		q->inactive_flows--;
48662306a36Sopenharmony_ci	}
48762306a36Sopenharmony_ci
48862306a36Sopenharmony_ci	/* Note: this overwrites f->age */
48962306a36Sopenharmony_ci	flow_queue_add(f, skb);
49062306a36Sopenharmony_ci
49162306a36Sopenharmony_ci	if (unlikely(f == &q->internal)) {
49262306a36Sopenharmony_ci		q->stat_internal_packets++;
49362306a36Sopenharmony_ci	}
49462306a36Sopenharmony_ci	sch->q.qlen++;
49562306a36Sopenharmony_ci
49662306a36Sopenharmony_ci	return NET_XMIT_SUCCESS;
49762306a36Sopenharmony_ci}
49862306a36Sopenharmony_ci
49962306a36Sopenharmony_cistatic void fq_check_throttled(struct fq_sched_data *q, u64 now)
50062306a36Sopenharmony_ci{
50162306a36Sopenharmony_ci	unsigned long sample;
50262306a36Sopenharmony_ci	struct rb_node *p;
50362306a36Sopenharmony_ci
50462306a36Sopenharmony_ci	if (q->time_next_delayed_flow > now)
50562306a36Sopenharmony_ci		return;
50662306a36Sopenharmony_ci
50762306a36Sopenharmony_ci	/* Update unthrottle latency EWMA.
50862306a36Sopenharmony_ci	 * This is cheap and can help diagnosing timer/latency problems.
50962306a36Sopenharmony_ci	 */
51062306a36Sopenharmony_ci	sample = (unsigned long)(now - q->time_next_delayed_flow);
51162306a36Sopenharmony_ci	q->unthrottle_latency_ns -= q->unthrottle_latency_ns >> 3;
51262306a36Sopenharmony_ci	q->unthrottle_latency_ns += sample >> 3;
51362306a36Sopenharmony_ci
51462306a36Sopenharmony_ci	q->time_next_delayed_flow = ~0ULL;
51562306a36Sopenharmony_ci	while ((p = rb_first(&q->delayed)) != NULL) {
51662306a36Sopenharmony_ci		struct fq_flow *f = rb_entry(p, struct fq_flow, rate_node);
51762306a36Sopenharmony_ci
51862306a36Sopenharmony_ci		if (f->time_next_packet > now) {
51962306a36Sopenharmony_ci			q->time_next_delayed_flow = f->time_next_packet;
52062306a36Sopenharmony_ci			break;
52162306a36Sopenharmony_ci		}
52262306a36Sopenharmony_ci		fq_flow_unset_throttled(q, f);
52362306a36Sopenharmony_ci	}
52462306a36Sopenharmony_ci}
52562306a36Sopenharmony_ci
52662306a36Sopenharmony_cistatic struct sk_buff *fq_dequeue(struct Qdisc *sch)
52762306a36Sopenharmony_ci{
52862306a36Sopenharmony_ci	struct fq_sched_data *q = qdisc_priv(sch);
52962306a36Sopenharmony_ci	struct fq_flow_head *head;
53062306a36Sopenharmony_ci	struct sk_buff *skb;
53162306a36Sopenharmony_ci	struct fq_flow *f;
53262306a36Sopenharmony_ci	unsigned long rate;
53362306a36Sopenharmony_ci	u32 plen;
53462306a36Sopenharmony_ci	u64 now;
53562306a36Sopenharmony_ci
53662306a36Sopenharmony_ci	if (!sch->q.qlen)
53762306a36Sopenharmony_ci		return NULL;
53862306a36Sopenharmony_ci
53962306a36Sopenharmony_ci	skb = fq_peek(&q->internal);
54062306a36Sopenharmony_ci	if (unlikely(skb)) {
54162306a36Sopenharmony_ci		fq_dequeue_skb(sch, &q->internal, skb);
54262306a36Sopenharmony_ci		goto out;
54362306a36Sopenharmony_ci	}
54462306a36Sopenharmony_ci
54562306a36Sopenharmony_ci	q->ktime_cache = now = ktime_get_ns();
54662306a36Sopenharmony_ci	fq_check_throttled(q, now);
54762306a36Sopenharmony_cibegin:
54862306a36Sopenharmony_ci	head = &q->new_flows;
54962306a36Sopenharmony_ci	if (!head->first) {
55062306a36Sopenharmony_ci		head = &q->old_flows;
55162306a36Sopenharmony_ci		if (!head->first) {
55262306a36Sopenharmony_ci			if (q->time_next_delayed_flow != ~0ULL)
55362306a36Sopenharmony_ci				qdisc_watchdog_schedule_range_ns(&q->watchdog,
55462306a36Sopenharmony_ci							q->time_next_delayed_flow,
55562306a36Sopenharmony_ci							q->timer_slack);
55662306a36Sopenharmony_ci			return NULL;
55762306a36Sopenharmony_ci		}
55862306a36Sopenharmony_ci	}
55962306a36Sopenharmony_ci	f = head->first;
56062306a36Sopenharmony_ci
56162306a36Sopenharmony_ci	if (f->credit <= 0) {
56262306a36Sopenharmony_ci		f->credit += q->quantum;
56362306a36Sopenharmony_ci		head->first = f->next;
56462306a36Sopenharmony_ci		fq_flow_add_tail(&q->old_flows, f);
56562306a36Sopenharmony_ci		goto begin;
56662306a36Sopenharmony_ci	}
56762306a36Sopenharmony_ci
56862306a36Sopenharmony_ci	skb = fq_peek(f);
56962306a36Sopenharmony_ci	if (skb) {
57062306a36Sopenharmony_ci		u64 time_next_packet = max_t(u64, fq_skb_cb(skb)->time_to_send,
57162306a36Sopenharmony_ci					     f->time_next_packet);
57262306a36Sopenharmony_ci
57362306a36Sopenharmony_ci		if (now < time_next_packet) {
57462306a36Sopenharmony_ci			head->first = f->next;
57562306a36Sopenharmony_ci			f->time_next_packet = time_next_packet;
57662306a36Sopenharmony_ci			fq_flow_set_throttled(q, f);
57762306a36Sopenharmony_ci			goto begin;
57862306a36Sopenharmony_ci		}
57962306a36Sopenharmony_ci		prefetch(&skb->end);
58062306a36Sopenharmony_ci		if ((s64)(now - time_next_packet - q->ce_threshold) > 0) {
58162306a36Sopenharmony_ci			INET_ECN_set_ce(skb);
58262306a36Sopenharmony_ci			q->stat_ce_mark++;
58362306a36Sopenharmony_ci		}
58462306a36Sopenharmony_ci		fq_dequeue_skb(sch, f, skb);
58562306a36Sopenharmony_ci	} else {
58662306a36Sopenharmony_ci		head->first = f->next;
58762306a36Sopenharmony_ci		/* force a pass through old_flows to prevent starvation */
58862306a36Sopenharmony_ci		if ((head == &q->new_flows) && q->old_flows.first) {
58962306a36Sopenharmony_ci			fq_flow_add_tail(&q->old_flows, f);
59062306a36Sopenharmony_ci		} else {
59162306a36Sopenharmony_ci			fq_flow_set_detached(f);
59262306a36Sopenharmony_ci			q->inactive_flows++;
59362306a36Sopenharmony_ci		}
59462306a36Sopenharmony_ci		goto begin;
59562306a36Sopenharmony_ci	}
59662306a36Sopenharmony_ci	plen = qdisc_pkt_len(skb);
59762306a36Sopenharmony_ci	f->credit -= plen;
59862306a36Sopenharmony_ci
59962306a36Sopenharmony_ci	if (!q->rate_enable)
60062306a36Sopenharmony_ci		goto out;
60162306a36Sopenharmony_ci
60262306a36Sopenharmony_ci	rate = q->flow_max_rate;
60362306a36Sopenharmony_ci
60462306a36Sopenharmony_ci	/* If EDT time was provided for this skb, we need to
60562306a36Sopenharmony_ci	 * update f->time_next_packet only if this qdisc enforces
60662306a36Sopenharmony_ci	 * a flow max rate.
60762306a36Sopenharmony_ci	 */
60862306a36Sopenharmony_ci	if (!skb->tstamp) {
60962306a36Sopenharmony_ci		if (skb->sk)
61062306a36Sopenharmony_ci			rate = min(skb->sk->sk_pacing_rate, rate);
61162306a36Sopenharmony_ci
61262306a36Sopenharmony_ci		if (rate <= q->low_rate_threshold) {
61362306a36Sopenharmony_ci			f->credit = 0;
61462306a36Sopenharmony_ci		} else {
61562306a36Sopenharmony_ci			plen = max(plen, q->quantum);
61662306a36Sopenharmony_ci			if (f->credit > 0)
61762306a36Sopenharmony_ci				goto out;
61862306a36Sopenharmony_ci		}
61962306a36Sopenharmony_ci	}
62062306a36Sopenharmony_ci	if (rate != ~0UL) {
62162306a36Sopenharmony_ci		u64 len = (u64)plen * NSEC_PER_SEC;
62262306a36Sopenharmony_ci
62362306a36Sopenharmony_ci		if (likely(rate))
62462306a36Sopenharmony_ci			len = div64_ul(len, rate);
62562306a36Sopenharmony_ci		/* Since socket rate can change later,
62662306a36Sopenharmony_ci		 * clamp the delay to 1 second.
62762306a36Sopenharmony_ci		 * Really, providers of too big packets should be fixed !
62862306a36Sopenharmony_ci		 */
62962306a36Sopenharmony_ci		if (unlikely(len > NSEC_PER_SEC)) {
63062306a36Sopenharmony_ci			len = NSEC_PER_SEC;
63162306a36Sopenharmony_ci			q->stat_pkts_too_long++;
63262306a36Sopenharmony_ci		}
63362306a36Sopenharmony_ci		/* Account for schedule/timers drifts.
63462306a36Sopenharmony_ci		 * f->time_next_packet was set when prior packet was sent,
63562306a36Sopenharmony_ci		 * and current time (@now) can be too late by tens of us.
63662306a36Sopenharmony_ci		 */
63762306a36Sopenharmony_ci		if (f->time_next_packet)
63862306a36Sopenharmony_ci			len -= min(len/2, now - f->time_next_packet);
63962306a36Sopenharmony_ci		f->time_next_packet = now + len;
64062306a36Sopenharmony_ci	}
64162306a36Sopenharmony_ciout:
64262306a36Sopenharmony_ci	qdisc_bstats_update(sch, skb);
64362306a36Sopenharmony_ci	return skb;
64462306a36Sopenharmony_ci}
64562306a36Sopenharmony_ci
64662306a36Sopenharmony_cistatic void fq_flow_purge(struct fq_flow *flow)
64762306a36Sopenharmony_ci{
64862306a36Sopenharmony_ci	struct rb_node *p = rb_first(&flow->t_root);
64962306a36Sopenharmony_ci
65062306a36Sopenharmony_ci	while (p) {
65162306a36Sopenharmony_ci		struct sk_buff *skb = rb_to_skb(p);
65262306a36Sopenharmony_ci
65362306a36Sopenharmony_ci		p = rb_next(p);
65462306a36Sopenharmony_ci		rb_erase(&skb->rbnode, &flow->t_root);
65562306a36Sopenharmony_ci		rtnl_kfree_skbs(skb, skb);
65662306a36Sopenharmony_ci	}
65762306a36Sopenharmony_ci	rtnl_kfree_skbs(flow->head, flow->tail);
65862306a36Sopenharmony_ci	flow->head = NULL;
65962306a36Sopenharmony_ci	flow->qlen = 0;
66062306a36Sopenharmony_ci}
66162306a36Sopenharmony_ci
66262306a36Sopenharmony_cistatic void fq_reset(struct Qdisc *sch)
66362306a36Sopenharmony_ci{
66462306a36Sopenharmony_ci	struct fq_sched_data *q = qdisc_priv(sch);
66562306a36Sopenharmony_ci	struct rb_root *root;
66662306a36Sopenharmony_ci	struct rb_node *p;
66762306a36Sopenharmony_ci	struct fq_flow *f;
66862306a36Sopenharmony_ci	unsigned int idx;
66962306a36Sopenharmony_ci
67062306a36Sopenharmony_ci	sch->q.qlen = 0;
67162306a36Sopenharmony_ci	sch->qstats.backlog = 0;
67262306a36Sopenharmony_ci
67362306a36Sopenharmony_ci	fq_flow_purge(&q->internal);
67462306a36Sopenharmony_ci
67562306a36Sopenharmony_ci	if (!q->fq_root)
67662306a36Sopenharmony_ci		return;
67762306a36Sopenharmony_ci
67862306a36Sopenharmony_ci	for (idx = 0; idx < (1U << q->fq_trees_log); idx++) {
67962306a36Sopenharmony_ci		root = &q->fq_root[idx];
68062306a36Sopenharmony_ci		while ((p = rb_first(root)) != NULL) {
68162306a36Sopenharmony_ci			f = rb_entry(p, struct fq_flow, fq_node);
68262306a36Sopenharmony_ci			rb_erase(p, root);
68362306a36Sopenharmony_ci
68462306a36Sopenharmony_ci			fq_flow_purge(f);
68562306a36Sopenharmony_ci
68662306a36Sopenharmony_ci			kmem_cache_free(fq_flow_cachep, f);
68762306a36Sopenharmony_ci		}
68862306a36Sopenharmony_ci	}
68962306a36Sopenharmony_ci	q->new_flows.first	= NULL;
69062306a36Sopenharmony_ci	q->old_flows.first	= NULL;
69162306a36Sopenharmony_ci	q->delayed		= RB_ROOT;
69262306a36Sopenharmony_ci	q->flows		= 0;
69362306a36Sopenharmony_ci	q->inactive_flows	= 0;
69462306a36Sopenharmony_ci	q->throttled_flows	= 0;
69562306a36Sopenharmony_ci}
69662306a36Sopenharmony_ci
69762306a36Sopenharmony_cistatic void fq_rehash(struct fq_sched_data *q,
69862306a36Sopenharmony_ci		      struct rb_root *old_array, u32 old_log,
69962306a36Sopenharmony_ci		      struct rb_root *new_array, u32 new_log)
70062306a36Sopenharmony_ci{
70162306a36Sopenharmony_ci	struct rb_node *op, **np, *parent;
70262306a36Sopenharmony_ci	struct rb_root *oroot, *nroot;
70362306a36Sopenharmony_ci	struct fq_flow *of, *nf;
70462306a36Sopenharmony_ci	int fcnt = 0;
70562306a36Sopenharmony_ci	u32 idx;
70662306a36Sopenharmony_ci
70762306a36Sopenharmony_ci	for (idx = 0; idx < (1U << old_log); idx++) {
70862306a36Sopenharmony_ci		oroot = &old_array[idx];
70962306a36Sopenharmony_ci		while ((op = rb_first(oroot)) != NULL) {
71062306a36Sopenharmony_ci			rb_erase(op, oroot);
71162306a36Sopenharmony_ci			of = rb_entry(op, struct fq_flow, fq_node);
71262306a36Sopenharmony_ci			if (fq_gc_candidate(of)) {
71362306a36Sopenharmony_ci				fcnt++;
71462306a36Sopenharmony_ci				kmem_cache_free(fq_flow_cachep, of);
71562306a36Sopenharmony_ci				continue;
71662306a36Sopenharmony_ci			}
71762306a36Sopenharmony_ci			nroot = &new_array[hash_ptr(of->sk, new_log)];
71862306a36Sopenharmony_ci
71962306a36Sopenharmony_ci			np = &nroot->rb_node;
72062306a36Sopenharmony_ci			parent = NULL;
72162306a36Sopenharmony_ci			while (*np) {
72262306a36Sopenharmony_ci				parent = *np;
72362306a36Sopenharmony_ci
72462306a36Sopenharmony_ci				nf = rb_entry(parent, struct fq_flow, fq_node);
72562306a36Sopenharmony_ci				BUG_ON(nf->sk == of->sk);
72662306a36Sopenharmony_ci
72762306a36Sopenharmony_ci				if (nf->sk > of->sk)
72862306a36Sopenharmony_ci					np = &parent->rb_right;
72962306a36Sopenharmony_ci				else
73062306a36Sopenharmony_ci					np = &parent->rb_left;
73162306a36Sopenharmony_ci			}
73262306a36Sopenharmony_ci
73362306a36Sopenharmony_ci			rb_link_node(&of->fq_node, parent, np);
73462306a36Sopenharmony_ci			rb_insert_color(&of->fq_node, nroot);
73562306a36Sopenharmony_ci		}
73662306a36Sopenharmony_ci	}
73762306a36Sopenharmony_ci	q->flows -= fcnt;
73862306a36Sopenharmony_ci	q->inactive_flows -= fcnt;
73962306a36Sopenharmony_ci	q->stat_gc_flows += fcnt;
74062306a36Sopenharmony_ci}
74162306a36Sopenharmony_ci
74262306a36Sopenharmony_cistatic void fq_free(void *addr)
74362306a36Sopenharmony_ci{
74462306a36Sopenharmony_ci	kvfree(addr);
74562306a36Sopenharmony_ci}
74662306a36Sopenharmony_ci
74762306a36Sopenharmony_cistatic int fq_resize(struct Qdisc *sch, u32 log)
74862306a36Sopenharmony_ci{
74962306a36Sopenharmony_ci	struct fq_sched_data *q = qdisc_priv(sch);
75062306a36Sopenharmony_ci	struct rb_root *array;
75162306a36Sopenharmony_ci	void *old_fq_root;
75262306a36Sopenharmony_ci	u32 idx;
75362306a36Sopenharmony_ci
75462306a36Sopenharmony_ci	if (q->fq_root && log == q->fq_trees_log)
75562306a36Sopenharmony_ci		return 0;
75662306a36Sopenharmony_ci
75762306a36Sopenharmony_ci	/* If XPS was setup, we can allocate memory on right NUMA node */
75862306a36Sopenharmony_ci	array = kvmalloc_node(sizeof(struct rb_root) << log, GFP_KERNEL | __GFP_RETRY_MAYFAIL,
75962306a36Sopenharmony_ci			      netdev_queue_numa_node_read(sch->dev_queue));
76062306a36Sopenharmony_ci	if (!array)
76162306a36Sopenharmony_ci		return -ENOMEM;
76262306a36Sopenharmony_ci
76362306a36Sopenharmony_ci	for (idx = 0; idx < (1U << log); idx++)
76462306a36Sopenharmony_ci		array[idx] = RB_ROOT;
76562306a36Sopenharmony_ci
76662306a36Sopenharmony_ci	sch_tree_lock(sch);
76762306a36Sopenharmony_ci
76862306a36Sopenharmony_ci	old_fq_root = q->fq_root;
76962306a36Sopenharmony_ci	if (old_fq_root)
77062306a36Sopenharmony_ci		fq_rehash(q, old_fq_root, q->fq_trees_log, array, log);
77162306a36Sopenharmony_ci
77262306a36Sopenharmony_ci	q->fq_root = array;
77362306a36Sopenharmony_ci	q->fq_trees_log = log;
77462306a36Sopenharmony_ci
77562306a36Sopenharmony_ci	sch_tree_unlock(sch);
77662306a36Sopenharmony_ci
77762306a36Sopenharmony_ci	fq_free(old_fq_root);
77862306a36Sopenharmony_ci
77962306a36Sopenharmony_ci	return 0;
78062306a36Sopenharmony_ci}
78162306a36Sopenharmony_ci
78262306a36Sopenharmony_cistatic struct netlink_range_validation iq_range = {
78362306a36Sopenharmony_ci	.max = INT_MAX,
78462306a36Sopenharmony_ci};
78562306a36Sopenharmony_ci
78662306a36Sopenharmony_cistatic const struct nla_policy fq_policy[TCA_FQ_MAX + 1] = {
78762306a36Sopenharmony_ci	[TCA_FQ_UNSPEC]			= { .strict_start_type = TCA_FQ_TIMER_SLACK },
78862306a36Sopenharmony_ci
78962306a36Sopenharmony_ci	[TCA_FQ_PLIMIT]			= { .type = NLA_U32 },
79062306a36Sopenharmony_ci	[TCA_FQ_FLOW_PLIMIT]		= { .type = NLA_U32 },
79162306a36Sopenharmony_ci	[TCA_FQ_QUANTUM]		= { .type = NLA_U32 },
79262306a36Sopenharmony_ci	[TCA_FQ_INITIAL_QUANTUM]	= NLA_POLICY_FULL_RANGE(NLA_U32, &iq_range),
79362306a36Sopenharmony_ci	[TCA_FQ_RATE_ENABLE]		= { .type = NLA_U32 },
79462306a36Sopenharmony_ci	[TCA_FQ_FLOW_DEFAULT_RATE]	= { .type = NLA_U32 },
79562306a36Sopenharmony_ci	[TCA_FQ_FLOW_MAX_RATE]		= { .type = NLA_U32 },
79662306a36Sopenharmony_ci	[TCA_FQ_BUCKETS_LOG]		= { .type = NLA_U32 },
79762306a36Sopenharmony_ci	[TCA_FQ_FLOW_REFILL_DELAY]	= { .type = NLA_U32 },
79862306a36Sopenharmony_ci	[TCA_FQ_ORPHAN_MASK]		= { .type = NLA_U32 },
79962306a36Sopenharmony_ci	[TCA_FQ_LOW_RATE_THRESHOLD]	= { .type = NLA_U32 },
80062306a36Sopenharmony_ci	[TCA_FQ_CE_THRESHOLD]		= { .type = NLA_U32 },
80162306a36Sopenharmony_ci	[TCA_FQ_TIMER_SLACK]		= { .type = NLA_U32 },
80262306a36Sopenharmony_ci	[TCA_FQ_HORIZON]		= { .type = NLA_U32 },
80362306a36Sopenharmony_ci	[TCA_FQ_HORIZON_DROP]		= { .type = NLA_U8 },
80462306a36Sopenharmony_ci};
80562306a36Sopenharmony_ci
80662306a36Sopenharmony_cistatic int fq_change(struct Qdisc *sch, struct nlattr *opt,
80762306a36Sopenharmony_ci		     struct netlink_ext_ack *extack)
80862306a36Sopenharmony_ci{
80962306a36Sopenharmony_ci	struct fq_sched_data *q = qdisc_priv(sch);
81062306a36Sopenharmony_ci	struct nlattr *tb[TCA_FQ_MAX + 1];
81162306a36Sopenharmony_ci	int err, drop_count = 0;
81262306a36Sopenharmony_ci	unsigned drop_len = 0;
81362306a36Sopenharmony_ci	u32 fq_log;
81462306a36Sopenharmony_ci
81562306a36Sopenharmony_ci	err = nla_parse_nested_deprecated(tb, TCA_FQ_MAX, opt, fq_policy,
81662306a36Sopenharmony_ci					  NULL);
81762306a36Sopenharmony_ci	if (err < 0)
81862306a36Sopenharmony_ci		return err;
81962306a36Sopenharmony_ci
82062306a36Sopenharmony_ci	sch_tree_lock(sch);
82162306a36Sopenharmony_ci
82262306a36Sopenharmony_ci	fq_log = q->fq_trees_log;
82362306a36Sopenharmony_ci
82462306a36Sopenharmony_ci	if (tb[TCA_FQ_BUCKETS_LOG]) {
82562306a36Sopenharmony_ci		u32 nval = nla_get_u32(tb[TCA_FQ_BUCKETS_LOG]);
82662306a36Sopenharmony_ci
82762306a36Sopenharmony_ci		if (nval >= 1 && nval <= ilog2(256*1024))
82862306a36Sopenharmony_ci			fq_log = nval;
82962306a36Sopenharmony_ci		else
83062306a36Sopenharmony_ci			err = -EINVAL;
83162306a36Sopenharmony_ci	}
83262306a36Sopenharmony_ci	if (tb[TCA_FQ_PLIMIT])
83362306a36Sopenharmony_ci		sch->limit = nla_get_u32(tb[TCA_FQ_PLIMIT]);
83462306a36Sopenharmony_ci
83562306a36Sopenharmony_ci	if (tb[TCA_FQ_FLOW_PLIMIT])
83662306a36Sopenharmony_ci		q->flow_plimit = nla_get_u32(tb[TCA_FQ_FLOW_PLIMIT]);
83762306a36Sopenharmony_ci
83862306a36Sopenharmony_ci	if (tb[TCA_FQ_QUANTUM]) {
83962306a36Sopenharmony_ci		u32 quantum = nla_get_u32(tb[TCA_FQ_QUANTUM]);
84062306a36Sopenharmony_ci
84162306a36Sopenharmony_ci		if (quantum > 0 && quantum <= (1 << 20)) {
84262306a36Sopenharmony_ci			q->quantum = quantum;
84362306a36Sopenharmony_ci		} else {
84462306a36Sopenharmony_ci			NL_SET_ERR_MSG_MOD(extack, "invalid quantum");
84562306a36Sopenharmony_ci			err = -EINVAL;
84662306a36Sopenharmony_ci		}
84762306a36Sopenharmony_ci	}
84862306a36Sopenharmony_ci
84962306a36Sopenharmony_ci	if (tb[TCA_FQ_INITIAL_QUANTUM])
85062306a36Sopenharmony_ci		q->initial_quantum = nla_get_u32(tb[TCA_FQ_INITIAL_QUANTUM]);
85162306a36Sopenharmony_ci
85262306a36Sopenharmony_ci	if (tb[TCA_FQ_FLOW_DEFAULT_RATE])
85362306a36Sopenharmony_ci		pr_warn_ratelimited("sch_fq: defrate %u ignored.\n",
85462306a36Sopenharmony_ci				    nla_get_u32(tb[TCA_FQ_FLOW_DEFAULT_RATE]));
85562306a36Sopenharmony_ci
85662306a36Sopenharmony_ci	if (tb[TCA_FQ_FLOW_MAX_RATE]) {
85762306a36Sopenharmony_ci		u32 rate = nla_get_u32(tb[TCA_FQ_FLOW_MAX_RATE]);
85862306a36Sopenharmony_ci
85962306a36Sopenharmony_ci		q->flow_max_rate = (rate == ~0U) ? ~0UL : rate;
86062306a36Sopenharmony_ci	}
86162306a36Sopenharmony_ci	if (tb[TCA_FQ_LOW_RATE_THRESHOLD])
86262306a36Sopenharmony_ci		q->low_rate_threshold =
86362306a36Sopenharmony_ci			nla_get_u32(tb[TCA_FQ_LOW_RATE_THRESHOLD]);
86462306a36Sopenharmony_ci
86562306a36Sopenharmony_ci	if (tb[TCA_FQ_RATE_ENABLE]) {
86662306a36Sopenharmony_ci		u32 enable = nla_get_u32(tb[TCA_FQ_RATE_ENABLE]);
86762306a36Sopenharmony_ci
86862306a36Sopenharmony_ci		if (enable <= 1)
86962306a36Sopenharmony_ci			q->rate_enable = enable;
87062306a36Sopenharmony_ci		else
87162306a36Sopenharmony_ci			err = -EINVAL;
87262306a36Sopenharmony_ci	}
87362306a36Sopenharmony_ci
87462306a36Sopenharmony_ci	if (tb[TCA_FQ_FLOW_REFILL_DELAY]) {
87562306a36Sopenharmony_ci		u32 usecs_delay = nla_get_u32(tb[TCA_FQ_FLOW_REFILL_DELAY]) ;
87662306a36Sopenharmony_ci
87762306a36Sopenharmony_ci		q->flow_refill_delay = usecs_to_jiffies(usecs_delay);
87862306a36Sopenharmony_ci	}
87962306a36Sopenharmony_ci
88062306a36Sopenharmony_ci	if (tb[TCA_FQ_ORPHAN_MASK])
88162306a36Sopenharmony_ci		q->orphan_mask = nla_get_u32(tb[TCA_FQ_ORPHAN_MASK]);
88262306a36Sopenharmony_ci
88362306a36Sopenharmony_ci	if (tb[TCA_FQ_CE_THRESHOLD])
88462306a36Sopenharmony_ci		q->ce_threshold = (u64)NSEC_PER_USEC *
88562306a36Sopenharmony_ci				  nla_get_u32(tb[TCA_FQ_CE_THRESHOLD]);
88662306a36Sopenharmony_ci
88762306a36Sopenharmony_ci	if (tb[TCA_FQ_TIMER_SLACK])
88862306a36Sopenharmony_ci		q->timer_slack = nla_get_u32(tb[TCA_FQ_TIMER_SLACK]);
88962306a36Sopenharmony_ci
89062306a36Sopenharmony_ci	if (tb[TCA_FQ_HORIZON])
89162306a36Sopenharmony_ci		q->horizon = (u64)NSEC_PER_USEC *
89262306a36Sopenharmony_ci				  nla_get_u32(tb[TCA_FQ_HORIZON]);
89362306a36Sopenharmony_ci
89462306a36Sopenharmony_ci	if (tb[TCA_FQ_HORIZON_DROP])
89562306a36Sopenharmony_ci		q->horizon_drop = nla_get_u8(tb[TCA_FQ_HORIZON_DROP]);
89662306a36Sopenharmony_ci
89762306a36Sopenharmony_ci	if (!err) {
89862306a36Sopenharmony_ci
89962306a36Sopenharmony_ci		sch_tree_unlock(sch);
90062306a36Sopenharmony_ci		err = fq_resize(sch, fq_log);
90162306a36Sopenharmony_ci		sch_tree_lock(sch);
90262306a36Sopenharmony_ci	}
90362306a36Sopenharmony_ci	while (sch->q.qlen > sch->limit) {
90462306a36Sopenharmony_ci		struct sk_buff *skb = fq_dequeue(sch);
90562306a36Sopenharmony_ci
90662306a36Sopenharmony_ci		if (!skb)
90762306a36Sopenharmony_ci			break;
90862306a36Sopenharmony_ci		drop_len += qdisc_pkt_len(skb);
90962306a36Sopenharmony_ci		rtnl_kfree_skbs(skb, skb);
91062306a36Sopenharmony_ci		drop_count++;
91162306a36Sopenharmony_ci	}
91262306a36Sopenharmony_ci	qdisc_tree_reduce_backlog(sch, drop_count, drop_len);
91362306a36Sopenharmony_ci
91462306a36Sopenharmony_ci	sch_tree_unlock(sch);
91562306a36Sopenharmony_ci	return err;
91662306a36Sopenharmony_ci}
91762306a36Sopenharmony_ci
91862306a36Sopenharmony_cistatic void fq_destroy(struct Qdisc *sch)
91962306a36Sopenharmony_ci{
92062306a36Sopenharmony_ci	struct fq_sched_data *q = qdisc_priv(sch);
92162306a36Sopenharmony_ci
92262306a36Sopenharmony_ci	fq_reset(sch);
92362306a36Sopenharmony_ci	fq_free(q->fq_root);
92462306a36Sopenharmony_ci	qdisc_watchdog_cancel(&q->watchdog);
92562306a36Sopenharmony_ci}
92662306a36Sopenharmony_ci
92762306a36Sopenharmony_cistatic int fq_init(struct Qdisc *sch, struct nlattr *opt,
92862306a36Sopenharmony_ci		   struct netlink_ext_ack *extack)
92962306a36Sopenharmony_ci{
93062306a36Sopenharmony_ci	struct fq_sched_data *q = qdisc_priv(sch);
93162306a36Sopenharmony_ci	int err;
93262306a36Sopenharmony_ci
93362306a36Sopenharmony_ci	sch->limit		= 10000;
93462306a36Sopenharmony_ci	q->flow_plimit		= 100;
93562306a36Sopenharmony_ci	q->quantum		= 2 * psched_mtu(qdisc_dev(sch));
93662306a36Sopenharmony_ci	q->initial_quantum	= 10 * psched_mtu(qdisc_dev(sch));
93762306a36Sopenharmony_ci	q->flow_refill_delay	= msecs_to_jiffies(40);
93862306a36Sopenharmony_ci	q->flow_max_rate	= ~0UL;
93962306a36Sopenharmony_ci	q->time_next_delayed_flow = ~0ULL;
94062306a36Sopenharmony_ci	q->rate_enable		= 1;
94162306a36Sopenharmony_ci	q->new_flows.first	= NULL;
94262306a36Sopenharmony_ci	q->old_flows.first	= NULL;
94362306a36Sopenharmony_ci	q->delayed		= RB_ROOT;
94462306a36Sopenharmony_ci	q->fq_root		= NULL;
94562306a36Sopenharmony_ci	q->fq_trees_log		= ilog2(1024);
94662306a36Sopenharmony_ci	q->orphan_mask		= 1024 - 1;
94762306a36Sopenharmony_ci	q->low_rate_threshold	= 550000 / 8;
94862306a36Sopenharmony_ci
94962306a36Sopenharmony_ci	q->timer_slack = 10 * NSEC_PER_USEC; /* 10 usec of hrtimer slack */
95062306a36Sopenharmony_ci
95162306a36Sopenharmony_ci	q->horizon = 10ULL * NSEC_PER_SEC; /* 10 seconds */
95262306a36Sopenharmony_ci	q->horizon_drop = 1; /* by default, drop packets beyond horizon */
95362306a36Sopenharmony_ci
95462306a36Sopenharmony_ci	/* Default ce_threshold of 4294 seconds */
95562306a36Sopenharmony_ci	q->ce_threshold		= (u64)NSEC_PER_USEC * ~0U;
95662306a36Sopenharmony_ci
95762306a36Sopenharmony_ci	qdisc_watchdog_init_clockid(&q->watchdog, sch, CLOCK_MONOTONIC);
95862306a36Sopenharmony_ci
95962306a36Sopenharmony_ci	if (opt)
96062306a36Sopenharmony_ci		err = fq_change(sch, opt, extack);
96162306a36Sopenharmony_ci	else
96262306a36Sopenharmony_ci		err = fq_resize(sch, q->fq_trees_log);
96362306a36Sopenharmony_ci
96462306a36Sopenharmony_ci	return err;
96562306a36Sopenharmony_ci}
96662306a36Sopenharmony_ci
96762306a36Sopenharmony_cistatic int fq_dump(struct Qdisc *sch, struct sk_buff *skb)
96862306a36Sopenharmony_ci{
96962306a36Sopenharmony_ci	struct fq_sched_data *q = qdisc_priv(sch);
97062306a36Sopenharmony_ci	u64 ce_threshold = q->ce_threshold;
97162306a36Sopenharmony_ci	u64 horizon = q->horizon;
97262306a36Sopenharmony_ci	struct nlattr *opts;
97362306a36Sopenharmony_ci
97462306a36Sopenharmony_ci	opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
97562306a36Sopenharmony_ci	if (opts == NULL)
97662306a36Sopenharmony_ci		goto nla_put_failure;
97762306a36Sopenharmony_ci
97862306a36Sopenharmony_ci	/* TCA_FQ_FLOW_DEFAULT_RATE is not used anymore */
97962306a36Sopenharmony_ci
98062306a36Sopenharmony_ci	do_div(ce_threshold, NSEC_PER_USEC);
98162306a36Sopenharmony_ci	do_div(horizon, NSEC_PER_USEC);
98262306a36Sopenharmony_ci
98362306a36Sopenharmony_ci	if (nla_put_u32(skb, TCA_FQ_PLIMIT, sch->limit) ||
98462306a36Sopenharmony_ci	    nla_put_u32(skb, TCA_FQ_FLOW_PLIMIT, q->flow_plimit) ||
98562306a36Sopenharmony_ci	    nla_put_u32(skb, TCA_FQ_QUANTUM, q->quantum) ||
98662306a36Sopenharmony_ci	    nla_put_u32(skb, TCA_FQ_INITIAL_QUANTUM, q->initial_quantum) ||
98762306a36Sopenharmony_ci	    nla_put_u32(skb, TCA_FQ_RATE_ENABLE, q->rate_enable) ||
98862306a36Sopenharmony_ci	    nla_put_u32(skb, TCA_FQ_FLOW_MAX_RATE,
98962306a36Sopenharmony_ci			min_t(unsigned long, q->flow_max_rate, ~0U)) ||
99062306a36Sopenharmony_ci	    nla_put_u32(skb, TCA_FQ_FLOW_REFILL_DELAY,
99162306a36Sopenharmony_ci			jiffies_to_usecs(q->flow_refill_delay)) ||
99262306a36Sopenharmony_ci	    nla_put_u32(skb, TCA_FQ_ORPHAN_MASK, q->orphan_mask) ||
99362306a36Sopenharmony_ci	    nla_put_u32(skb, TCA_FQ_LOW_RATE_THRESHOLD,
99462306a36Sopenharmony_ci			q->low_rate_threshold) ||
99562306a36Sopenharmony_ci	    nla_put_u32(skb, TCA_FQ_CE_THRESHOLD, (u32)ce_threshold) ||
99662306a36Sopenharmony_ci	    nla_put_u32(skb, TCA_FQ_BUCKETS_LOG, q->fq_trees_log) ||
99762306a36Sopenharmony_ci	    nla_put_u32(skb, TCA_FQ_TIMER_SLACK, q->timer_slack) ||
99862306a36Sopenharmony_ci	    nla_put_u32(skb, TCA_FQ_HORIZON, (u32)horizon) ||
99962306a36Sopenharmony_ci	    nla_put_u8(skb, TCA_FQ_HORIZON_DROP, q->horizon_drop))
100062306a36Sopenharmony_ci		goto nla_put_failure;
100162306a36Sopenharmony_ci
100262306a36Sopenharmony_ci	return nla_nest_end(skb, opts);
100362306a36Sopenharmony_ci
100462306a36Sopenharmony_cinla_put_failure:
100562306a36Sopenharmony_ci	return -1;
100662306a36Sopenharmony_ci}
100762306a36Sopenharmony_ci
100862306a36Sopenharmony_cistatic int fq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
100962306a36Sopenharmony_ci{
101062306a36Sopenharmony_ci	struct fq_sched_data *q = qdisc_priv(sch);
101162306a36Sopenharmony_ci	struct tc_fq_qd_stats st;
101262306a36Sopenharmony_ci
101362306a36Sopenharmony_ci	sch_tree_lock(sch);
101462306a36Sopenharmony_ci
101562306a36Sopenharmony_ci	st.gc_flows		  = q->stat_gc_flows;
101662306a36Sopenharmony_ci	st.highprio_packets	  = q->stat_internal_packets;
101762306a36Sopenharmony_ci	st.tcp_retrans		  = 0;
101862306a36Sopenharmony_ci	st.throttled		  = q->stat_throttled;
101962306a36Sopenharmony_ci	st.flows_plimit		  = q->stat_flows_plimit;
102062306a36Sopenharmony_ci	st.pkts_too_long	  = q->stat_pkts_too_long;
102162306a36Sopenharmony_ci	st.allocation_errors	  = q->stat_allocation_errors;
102262306a36Sopenharmony_ci	st.time_next_delayed_flow = q->time_next_delayed_flow + q->timer_slack -
102362306a36Sopenharmony_ci				    ktime_get_ns();
102462306a36Sopenharmony_ci	st.flows		  = q->flows;
102562306a36Sopenharmony_ci	st.inactive_flows	  = q->inactive_flows;
102662306a36Sopenharmony_ci	st.throttled_flows	  = q->throttled_flows;
102762306a36Sopenharmony_ci	st.unthrottle_latency_ns  = min_t(unsigned long,
102862306a36Sopenharmony_ci					  q->unthrottle_latency_ns, ~0U);
102962306a36Sopenharmony_ci	st.ce_mark		  = q->stat_ce_mark;
103062306a36Sopenharmony_ci	st.horizon_drops	  = q->stat_horizon_drops;
103162306a36Sopenharmony_ci	st.horizon_caps		  = q->stat_horizon_caps;
103262306a36Sopenharmony_ci	sch_tree_unlock(sch);
103362306a36Sopenharmony_ci
103462306a36Sopenharmony_ci	return gnet_stats_copy_app(d, &st, sizeof(st));
103562306a36Sopenharmony_ci}
103662306a36Sopenharmony_ci
103762306a36Sopenharmony_cistatic struct Qdisc_ops fq_qdisc_ops __read_mostly = {
103862306a36Sopenharmony_ci	.id		=	"fq",
103962306a36Sopenharmony_ci	.priv_size	=	sizeof(struct fq_sched_data),
104062306a36Sopenharmony_ci
104162306a36Sopenharmony_ci	.enqueue	=	fq_enqueue,
104262306a36Sopenharmony_ci	.dequeue	=	fq_dequeue,
104362306a36Sopenharmony_ci	.peek		=	qdisc_peek_dequeued,
104462306a36Sopenharmony_ci	.init		=	fq_init,
104562306a36Sopenharmony_ci	.reset		=	fq_reset,
104662306a36Sopenharmony_ci	.destroy	=	fq_destroy,
104762306a36Sopenharmony_ci	.change		=	fq_change,
104862306a36Sopenharmony_ci	.dump		=	fq_dump,
104962306a36Sopenharmony_ci	.dump_stats	=	fq_dump_stats,
105062306a36Sopenharmony_ci	.owner		=	THIS_MODULE,
105162306a36Sopenharmony_ci};
105262306a36Sopenharmony_ci
105362306a36Sopenharmony_cistatic int __init fq_module_init(void)
105462306a36Sopenharmony_ci{
105562306a36Sopenharmony_ci	int ret;
105662306a36Sopenharmony_ci
105762306a36Sopenharmony_ci	fq_flow_cachep = kmem_cache_create("fq_flow_cache",
105862306a36Sopenharmony_ci					   sizeof(struct fq_flow),
105962306a36Sopenharmony_ci					   0, 0, NULL);
106062306a36Sopenharmony_ci	if (!fq_flow_cachep)
106162306a36Sopenharmony_ci		return -ENOMEM;
106262306a36Sopenharmony_ci
106362306a36Sopenharmony_ci	ret = register_qdisc(&fq_qdisc_ops);
106462306a36Sopenharmony_ci	if (ret)
106562306a36Sopenharmony_ci		kmem_cache_destroy(fq_flow_cachep);
106662306a36Sopenharmony_ci	return ret;
106762306a36Sopenharmony_ci}
106862306a36Sopenharmony_ci
106962306a36Sopenharmony_cistatic void __exit fq_module_exit(void)
107062306a36Sopenharmony_ci{
107162306a36Sopenharmony_ci	unregister_qdisc(&fq_qdisc_ops);
107262306a36Sopenharmony_ci	kmem_cache_destroy(fq_flow_cachep);
107362306a36Sopenharmony_ci}
107462306a36Sopenharmony_ci
107562306a36Sopenharmony_cimodule_init(fq_module_init)
107662306a36Sopenharmony_cimodule_exit(fq_module_exit)
107762306a36Sopenharmony_ciMODULE_AUTHOR("Eric Dumazet");
107862306a36Sopenharmony_ciMODULE_LICENSE("GPL");
107962306a36Sopenharmony_ciMODULE_DESCRIPTION("Fair Queue Packet Scheduler");
1080