xref: /kernel/linux/linux-5.10/net/sched/sch_fq.c (revision 8c2ecf20)
18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * net/sched/sch_fq.c Fair Queue Packet Scheduler (per flow pacing)
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci *  Copyright (C) 2013-2015 Eric Dumazet <edumazet@google.com>
68c2ecf20Sopenharmony_ci *
78c2ecf20Sopenharmony_ci *  Meant to be mostly used for locally generated traffic :
88c2ecf20Sopenharmony_ci *  Fast classification depends on skb->sk being set before reaching us.
98c2ecf20Sopenharmony_ci *  If not, (router workload), we use rxhash as fallback, with 32 bits wide hash.
108c2ecf20Sopenharmony_ci *  All packets belonging to a socket are considered as a 'flow'.
118c2ecf20Sopenharmony_ci *
128c2ecf20Sopenharmony_ci *  Flows are dynamically allocated and stored in a hash table of RB trees
138c2ecf20Sopenharmony_ci *  They are also part of one Round Robin 'queues' (new or old flows)
148c2ecf20Sopenharmony_ci *
158c2ecf20Sopenharmony_ci *  Burst avoidance (aka pacing) capability :
168c2ecf20Sopenharmony_ci *
178c2ecf20Sopenharmony_ci *  Transport (eg TCP) can set in sk->sk_pacing_rate a rate, enqueue a
188c2ecf20Sopenharmony_ci *  bunch of packets, and this packet scheduler adds delay between
198c2ecf20Sopenharmony_ci *  packets to respect rate limitation.
208c2ecf20Sopenharmony_ci *
218c2ecf20Sopenharmony_ci *  enqueue() :
228c2ecf20Sopenharmony_ci *   - lookup one RB tree (out of 1024 or more) to find the flow.
238c2ecf20Sopenharmony_ci *     If non existent flow, create it, add it to the tree.
248c2ecf20Sopenharmony_ci *     Add skb to the per flow list of skb (fifo).
258c2ecf20Sopenharmony_ci *   - Use a special fifo for high prio packets
268c2ecf20Sopenharmony_ci *
278c2ecf20Sopenharmony_ci *  dequeue() : serves flows in Round Robin
288c2ecf20Sopenharmony_ci *  Note : When a flow becomes empty, we do not immediately remove it from
298c2ecf20Sopenharmony_ci *  rb trees, for performance reasons (its expected to send additional packets,
308c2ecf20Sopenharmony_ci *  or SLAB cache will reuse socket for another flow)
318c2ecf20Sopenharmony_ci */
328c2ecf20Sopenharmony_ci
338c2ecf20Sopenharmony_ci#include <linux/module.h>
348c2ecf20Sopenharmony_ci#include <linux/types.h>
358c2ecf20Sopenharmony_ci#include <linux/kernel.h>
368c2ecf20Sopenharmony_ci#include <linux/jiffies.h>
378c2ecf20Sopenharmony_ci#include <linux/string.h>
388c2ecf20Sopenharmony_ci#include <linux/in.h>
398c2ecf20Sopenharmony_ci#include <linux/errno.h>
408c2ecf20Sopenharmony_ci#include <linux/init.h>
418c2ecf20Sopenharmony_ci#include <linux/skbuff.h>
428c2ecf20Sopenharmony_ci#include <linux/slab.h>
438c2ecf20Sopenharmony_ci#include <linux/rbtree.h>
448c2ecf20Sopenharmony_ci#include <linux/hash.h>
458c2ecf20Sopenharmony_ci#include <linux/prefetch.h>
468c2ecf20Sopenharmony_ci#include <linux/vmalloc.h>
478c2ecf20Sopenharmony_ci#include <net/netlink.h>
488c2ecf20Sopenharmony_ci#include <net/pkt_sched.h>
498c2ecf20Sopenharmony_ci#include <net/sock.h>
508c2ecf20Sopenharmony_ci#include <net/tcp_states.h>
518c2ecf20Sopenharmony_ci#include <net/tcp.h>
528c2ecf20Sopenharmony_ci
538c2ecf20Sopenharmony_cistruct fq_skb_cb {
548c2ecf20Sopenharmony_ci	u64	        time_to_send;
558c2ecf20Sopenharmony_ci};
568c2ecf20Sopenharmony_ci
578c2ecf20Sopenharmony_cistatic inline struct fq_skb_cb *fq_skb_cb(struct sk_buff *skb)
588c2ecf20Sopenharmony_ci{
598c2ecf20Sopenharmony_ci	qdisc_cb_private_validate(skb, sizeof(struct fq_skb_cb));
608c2ecf20Sopenharmony_ci	return (struct fq_skb_cb *)qdisc_skb_cb(skb)->data;
618c2ecf20Sopenharmony_ci}
628c2ecf20Sopenharmony_ci
638c2ecf20Sopenharmony_ci/*
648c2ecf20Sopenharmony_ci * Per flow structure, dynamically allocated.
658c2ecf20Sopenharmony_ci * If packets have monotically increasing time_to_send, they are placed in O(1)
668c2ecf20Sopenharmony_ci * in linear list (head,tail), otherwise are placed in a rbtree (t_root).
678c2ecf20Sopenharmony_ci */
688c2ecf20Sopenharmony_cistruct fq_flow {
698c2ecf20Sopenharmony_ci/* First cache line : used in fq_gc(), fq_enqueue(), fq_dequeue() */
708c2ecf20Sopenharmony_ci	struct rb_root	t_root;
718c2ecf20Sopenharmony_ci	struct sk_buff	*head;		/* list of skbs for this flow : first skb */
728c2ecf20Sopenharmony_ci	union {
738c2ecf20Sopenharmony_ci		struct sk_buff *tail;	/* last skb in the list */
748c2ecf20Sopenharmony_ci		unsigned long  age;	/* (jiffies | 1UL) when flow was emptied, for gc */
758c2ecf20Sopenharmony_ci	};
768c2ecf20Sopenharmony_ci	struct rb_node	fq_node;	/* anchor in fq_root[] trees */
778c2ecf20Sopenharmony_ci	struct sock	*sk;
788c2ecf20Sopenharmony_ci	u32		socket_hash;	/* sk_hash */
798c2ecf20Sopenharmony_ci	int		qlen;		/* number of packets in flow queue */
808c2ecf20Sopenharmony_ci
818c2ecf20Sopenharmony_ci/* Second cache line, used in fq_dequeue() */
828c2ecf20Sopenharmony_ci	int		credit;
838c2ecf20Sopenharmony_ci	/* 32bit hole on 64bit arches */
848c2ecf20Sopenharmony_ci
858c2ecf20Sopenharmony_ci	struct fq_flow *next;		/* next pointer in RR lists */
868c2ecf20Sopenharmony_ci
878c2ecf20Sopenharmony_ci	struct rb_node  rate_node;	/* anchor in q->delayed tree */
888c2ecf20Sopenharmony_ci	u64		time_next_packet;
898c2ecf20Sopenharmony_ci} ____cacheline_aligned_in_smp;
908c2ecf20Sopenharmony_ci
918c2ecf20Sopenharmony_cistruct fq_flow_head {
928c2ecf20Sopenharmony_ci	struct fq_flow *first;
938c2ecf20Sopenharmony_ci	struct fq_flow *last;
948c2ecf20Sopenharmony_ci};
958c2ecf20Sopenharmony_ci
968c2ecf20Sopenharmony_cistruct fq_sched_data {
978c2ecf20Sopenharmony_ci	struct fq_flow_head new_flows;
988c2ecf20Sopenharmony_ci
998c2ecf20Sopenharmony_ci	struct fq_flow_head old_flows;
1008c2ecf20Sopenharmony_ci
1018c2ecf20Sopenharmony_ci	struct rb_root	delayed;	/* for rate limited flows */
1028c2ecf20Sopenharmony_ci	u64		time_next_delayed_flow;
1038c2ecf20Sopenharmony_ci	u64		ktime_cache;	/* copy of last ktime_get_ns() */
1048c2ecf20Sopenharmony_ci	unsigned long	unthrottle_latency_ns;
1058c2ecf20Sopenharmony_ci
1068c2ecf20Sopenharmony_ci	struct fq_flow	internal;	/* for non classified or high prio packets */
1078c2ecf20Sopenharmony_ci	u32		quantum;
1088c2ecf20Sopenharmony_ci	u32		initial_quantum;
1098c2ecf20Sopenharmony_ci	u32		flow_refill_delay;
1108c2ecf20Sopenharmony_ci	u32		flow_plimit;	/* max packets per flow */
1118c2ecf20Sopenharmony_ci	unsigned long	flow_max_rate;	/* optional max rate per flow */
1128c2ecf20Sopenharmony_ci	u64		ce_threshold;
1138c2ecf20Sopenharmony_ci	u64		horizon;	/* horizon in ns */
1148c2ecf20Sopenharmony_ci	u32		orphan_mask;	/* mask for orphaned skb */
1158c2ecf20Sopenharmony_ci	u32		low_rate_threshold;
1168c2ecf20Sopenharmony_ci	struct rb_root	*fq_root;
1178c2ecf20Sopenharmony_ci	u8		rate_enable;
1188c2ecf20Sopenharmony_ci	u8		fq_trees_log;
1198c2ecf20Sopenharmony_ci	u8		horizon_drop;
1208c2ecf20Sopenharmony_ci	u32		flows;
1218c2ecf20Sopenharmony_ci	u32		inactive_flows;
1228c2ecf20Sopenharmony_ci	u32		throttled_flows;
1238c2ecf20Sopenharmony_ci
1248c2ecf20Sopenharmony_ci	u64		stat_gc_flows;
1258c2ecf20Sopenharmony_ci	u64		stat_internal_packets;
1268c2ecf20Sopenharmony_ci	u64		stat_throttled;
1278c2ecf20Sopenharmony_ci	u64		stat_ce_mark;
1288c2ecf20Sopenharmony_ci	u64		stat_horizon_drops;
1298c2ecf20Sopenharmony_ci	u64		stat_horizon_caps;
1308c2ecf20Sopenharmony_ci	u64		stat_flows_plimit;
1318c2ecf20Sopenharmony_ci	u64		stat_pkts_too_long;
1328c2ecf20Sopenharmony_ci	u64		stat_allocation_errors;
1338c2ecf20Sopenharmony_ci
1348c2ecf20Sopenharmony_ci	u32		timer_slack; /* hrtimer slack in ns */
1358c2ecf20Sopenharmony_ci	struct qdisc_watchdog watchdog;
1368c2ecf20Sopenharmony_ci};
1378c2ecf20Sopenharmony_ci
1388c2ecf20Sopenharmony_ci/*
1398c2ecf20Sopenharmony_ci * f->tail and f->age share the same location.
1408c2ecf20Sopenharmony_ci * We can use the low order bit to differentiate if this location points
1418c2ecf20Sopenharmony_ci * to a sk_buff or contains a jiffies value, if we force this value to be odd.
1428c2ecf20Sopenharmony_ci * This assumes f->tail low order bit must be 0 since alignof(struct sk_buff) >= 2
1438c2ecf20Sopenharmony_ci */
1448c2ecf20Sopenharmony_cistatic void fq_flow_set_detached(struct fq_flow *f)
1458c2ecf20Sopenharmony_ci{
1468c2ecf20Sopenharmony_ci	f->age = jiffies | 1UL;
1478c2ecf20Sopenharmony_ci}
1488c2ecf20Sopenharmony_ci
1498c2ecf20Sopenharmony_cistatic bool fq_flow_is_detached(const struct fq_flow *f)
1508c2ecf20Sopenharmony_ci{
1518c2ecf20Sopenharmony_ci	return !!(f->age & 1UL);
1528c2ecf20Sopenharmony_ci}
1538c2ecf20Sopenharmony_ci
1548c2ecf20Sopenharmony_ci/* special value to mark a throttled flow (not on old/new list) */
1558c2ecf20Sopenharmony_cistatic struct fq_flow throttled;
1568c2ecf20Sopenharmony_ci
1578c2ecf20Sopenharmony_cistatic bool fq_flow_is_throttled(const struct fq_flow *f)
1588c2ecf20Sopenharmony_ci{
1598c2ecf20Sopenharmony_ci	return f->next == &throttled;
1608c2ecf20Sopenharmony_ci}
1618c2ecf20Sopenharmony_ci
1628c2ecf20Sopenharmony_cistatic void fq_flow_add_tail(struct fq_flow_head *head, struct fq_flow *flow)
1638c2ecf20Sopenharmony_ci{
1648c2ecf20Sopenharmony_ci	if (head->first)
1658c2ecf20Sopenharmony_ci		head->last->next = flow;
1668c2ecf20Sopenharmony_ci	else
1678c2ecf20Sopenharmony_ci		head->first = flow;
1688c2ecf20Sopenharmony_ci	head->last = flow;
1698c2ecf20Sopenharmony_ci	flow->next = NULL;
1708c2ecf20Sopenharmony_ci}
1718c2ecf20Sopenharmony_ci
1728c2ecf20Sopenharmony_cistatic void fq_flow_unset_throttled(struct fq_sched_data *q, struct fq_flow *f)
1738c2ecf20Sopenharmony_ci{
1748c2ecf20Sopenharmony_ci	rb_erase(&f->rate_node, &q->delayed);
1758c2ecf20Sopenharmony_ci	q->throttled_flows--;
1768c2ecf20Sopenharmony_ci	fq_flow_add_tail(&q->old_flows, f);
1778c2ecf20Sopenharmony_ci}
1788c2ecf20Sopenharmony_ci
1798c2ecf20Sopenharmony_cistatic void fq_flow_set_throttled(struct fq_sched_data *q, struct fq_flow *f)
1808c2ecf20Sopenharmony_ci{
1818c2ecf20Sopenharmony_ci	struct rb_node **p = &q->delayed.rb_node, *parent = NULL;
1828c2ecf20Sopenharmony_ci
1838c2ecf20Sopenharmony_ci	while (*p) {
1848c2ecf20Sopenharmony_ci		struct fq_flow *aux;
1858c2ecf20Sopenharmony_ci
1868c2ecf20Sopenharmony_ci		parent = *p;
1878c2ecf20Sopenharmony_ci		aux = rb_entry(parent, struct fq_flow, rate_node);
1888c2ecf20Sopenharmony_ci		if (f->time_next_packet >= aux->time_next_packet)
1898c2ecf20Sopenharmony_ci			p = &parent->rb_right;
1908c2ecf20Sopenharmony_ci		else
1918c2ecf20Sopenharmony_ci			p = &parent->rb_left;
1928c2ecf20Sopenharmony_ci	}
1938c2ecf20Sopenharmony_ci	rb_link_node(&f->rate_node, parent, p);
1948c2ecf20Sopenharmony_ci	rb_insert_color(&f->rate_node, &q->delayed);
1958c2ecf20Sopenharmony_ci	q->throttled_flows++;
1968c2ecf20Sopenharmony_ci	q->stat_throttled++;
1978c2ecf20Sopenharmony_ci
1988c2ecf20Sopenharmony_ci	f->next = &throttled;
1998c2ecf20Sopenharmony_ci	if (q->time_next_delayed_flow > f->time_next_packet)
2008c2ecf20Sopenharmony_ci		q->time_next_delayed_flow = f->time_next_packet;
2018c2ecf20Sopenharmony_ci}
2028c2ecf20Sopenharmony_ci
2038c2ecf20Sopenharmony_ci
2048c2ecf20Sopenharmony_cistatic struct kmem_cache *fq_flow_cachep __read_mostly;
2058c2ecf20Sopenharmony_ci
2068c2ecf20Sopenharmony_ci
2078c2ecf20Sopenharmony_ci/* limit number of collected flows per round */
2088c2ecf20Sopenharmony_ci#define FQ_GC_MAX 8
2098c2ecf20Sopenharmony_ci#define FQ_GC_AGE (3*HZ)
2108c2ecf20Sopenharmony_ci
2118c2ecf20Sopenharmony_cistatic bool fq_gc_candidate(const struct fq_flow *f)
2128c2ecf20Sopenharmony_ci{
2138c2ecf20Sopenharmony_ci	return fq_flow_is_detached(f) &&
2148c2ecf20Sopenharmony_ci	       time_after(jiffies, f->age + FQ_GC_AGE);
2158c2ecf20Sopenharmony_ci}
2168c2ecf20Sopenharmony_ci
2178c2ecf20Sopenharmony_cistatic void fq_gc(struct fq_sched_data *q,
2188c2ecf20Sopenharmony_ci		  struct rb_root *root,
2198c2ecf20Sopenharmony_ci		  struct sock *sk)
2208c2ecf20Sopenharmony_ci{
2218c2ecf20Sopenharmony_ci	struct rb_node **p, *parent;
2228c2ecf20Sopenharmony_ci	void *tofree[FQ_GC_MAX];
2238c2ecf20Sopenharmony_ci	struct fq_flow *f;
2248c2ecf20Sopenharmony_ci	int i, fcnt = 0;
2258c2ecf20Sopenharmony_ci
2268c2ecf20Sopenharmony_ci	p = &root->rb_node;
2278c2ecf20Sopenharmony_ci	parent = NULL;
2288c2ecf20Sopenharmony_ci	while (*p) {
2298c2ecf20Sopenharmony_ci		parent = *p;
2308c2ecf20Sopenharmony_ci
2318c2ecf20Sopenharmony_ci		f = rb_entry(parent, struct fq_flow, fq_node);
2328c2ecf20Sopenharmony_ci		if (f->sk == sk)
2338c2ecf20Sopenharmony_ci			break;
2348c2ecf20Sopenharmony_ci
2358c2ecf20Sopenharmony_ci		if (fq_gc_candidate(f)) {
2368c2ecf20Sopenharmony_ci			tofree[fcnt++] = f;
2378c2ecf20Sopenharmony_ci			if (fcnt == FQ_GC_MAX)
2388c2ecf20Sopenharmony_ci				break;
2398c2ecf20Sopenharmony_ci		}
2408c2ecf20Sopenharmony_ci
2418c2ecf20Sopenharmony_ci		if (f->sk > sk)
2428c2ecf20Sopenharmony_ci			p = &parent->rb_right;
2438c2ecf20Sopenharmony_ci		else
2448c2ecf20Sopenharmony_ci			p = &parent->rb_left;
2458c2ecf20Sopenharmony_ci	}
2468c2ecf20Sopenharmony_ci
2478c2ecf20Sopenharmony_ci	if (!fcnt)
2488c2ecf20Sopenharmony_ci		return;
2498c2ecf20Sopenharmony_ci
2508c2ecf20Sopenharmony_ci	for (i = fcnt; i > 0; ) {
2518c2ecf20Sopenharmony_ci		f = tofree[--i];
2528c2ecf20Sopenharmony_ci		rb_erase(&f->fq_node, root);
2538c2ecf20Sopenharmony_ci	}
2548c2ecf20Sopenharmony_ci	q->flows -= fcnt;
2558c2ecf20Sopenharmony_ci	q->inactive_flows -= fcnt;
2568c2ecf20Sopenharmony_ci	q->stat_gc_flows += fcnt;
2578c2ecf20Sopenharmony_ci
2588c2ecf20Sopenharmony_ci	kmem_cache_free_bulk(fq_flow_cachep, fcnt, tofree);
2598c2ecf20Sopenharmony_ci}
2608c2ecf20Sopenharmony_ci
2618c2ecf20Sopenharmony_cistatic struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q)
2628c2ecf20Sopenharmony_ci{
2638c2ecf20Sopenharmony_ci	struct rb_node **p, *parent;
2648c2ecf20Sopenharmony_ci	struct sock *sk = skb->sk;
2658c2ecf20Sopenharmony_ci	struct rb_root *root;
2668c2ecf20Sopenharmony_ci	struct fq_flow *f;
2678c2ecf20Sopenharmony_ci
2688c2ecf20Sopenharmony_ci	/* warning: no starvation prevention... */
2698c2ecf20Sopenharmony_ci	if (unlikely((skb->priority & TC_PRIO_MAX) == TC_PRIO_CONTROL))
2708c2ecf20Sopenharmony_ci		return &q->internal;
2718c2ecf20Sopenharmony_ci
2728c2ecf20Sopenharmony_ci	/* SYNACK messages are attached to a TCP_NEW_SYN_RECV request socket
2738c2ecf20Sopenharmony_ci	 * or a listener (SYNCOOKIE mode)
2748c2ecf20Sopenharmony_ci	 * 1) request sockets are not full blown,
2758c2ecf20Sopenharmony_ci	 *    they do not contain sk_pacing_rate
2768c2ecf20Sopenharmony_ci	 * 2) They are not part of a 'flow' yet
2778c2ecf20Sopenharmony_ci	 * 3) We do not want to rate limit them (eg SYNFLOOD attack),
2788c2ecf20Sopenharmony_ci	 *    especially if the listener set SO_MAX_PACING_RATE
2798c2ecf20Sopenharmony_ci	 * 4) We pretend they are orphaned
2808c2ecf20Sopenharmony_ci	 */
2818c2ecf20Sopenharmony_ci	if (!sk || sk_listener(sk)) {
2828c2ecf20Sopenharmony_ci		unsigned long hash = skb_get_hash(skb) & q->orphan_mask;
2838c2ecf20Sopenharmony_ci
2848c2ecf20Sopenharmony_ci		/* By forcing low order bit to 1, we make sure to not
2858c2ecf20Sopenharmony_ci		 * collide with a local flow (socket pointers are word aligned)
2868c2ecf20Sopenharmony_ci		 */
2878c2ecf20Sopenharmony_ci		sk = (struct sock *)((hash << 1) | 1UL);
2888c2ecf20Sopenharmony_ci		skb_orphan(skb);
2898c2ecf20Sopenharmony_ci	} else if (sk->sk_state == TCP_CLOSE) {
2908c2ecf20Sopenharmony_ci		unsigned long hash = skb_get_hash(skb) & q->orphan_mask;
2918c2ecf20Sopenharmony_ci		/*
2928c2ecf20Sopenharmony_ci		 * Sockets in TCP_CLOSE are non connected.
2938c2ecf20Sopenharmony_ci		 * Typical use case is UDP sockets, they can send packets
2948c2ecf20Sopenharmony_ci		 * with sendto() to many different destinations.
2958c2ecf20Sopenharmony_ci		 * We probably could use a generic bit advertising
2968c2ecf20Sopenharmony_ci		 * non connected sockets, instead of sk_state == TCP_CLOSE,
2978c2ecf20Sopenharmony_ci		 * if we care enough.
2988c2ecf20Sopenharmony_ci		 */
2998c2ecf20Sopenharmony_ci		sk = (struct sock *)((hash << 1) | 1UL);
3008c2ecf20Sopenharmony_ci	}
3018c2ecf20Sopenharmony_ci
3028c2ecf20Sopenharmony_ci	root = &q->fq_root[hash_ptr(sk, q->fq_trees_log)];
3038c2ecf20Sopenharmony_ci
3048c2ecf20Sopenharmony_ci	if (q->flows >= (2U << q->fq_trees_log) &&
3058c2ecf20Sopenharmony_ci	    q->inactive_flows > q->flows/2)
3068c2ecf20Sopenharmony_ci		fq_gc(q, root, sk);
3078c2ecf20Sopenharmony_ci
3088c2ecf20Sopenharmony_ci	p = &root->rb_node;
3098c2ecf20Sopenharmony_ci	parent = NULL;
3108c2ecf20Sopenharmony_ci	while (*p) {
3118c2ecf20Sopenharmony_ci		parent = *p;
3128c2ecf20Sopenharmony_ci
3138c2ecf20Sopenharmony_ci		f = rb_entry(parent, struct fq_flow, fq_node);
3148c2ecf20Sopenharmony_ci		if (f->sk == sk) {
3158c2ecf20Sopenharmony_ci			/* socket might have been reallocated, so check
3168c2ecf20Sopenharmony_ci			 * if its sk_hash is the same.
3178c2ecf20Sopenharmony_ci			 * It not, we need to refill credit with
3188c2ecf20Sopenharmony_ci			 * initial quantum
3198c2ecf20Sopenharmony_ci			 */
3208c2ecf20Sopenharmony_ci			if (unlikely(skb->sk == sk &&
3218c2ecf20Sopenharmony_ci				     f->socket_hash != sk->sk_hash)) {
3228c2ecf20Sopenharmony_ci				f->credit = q->initial_quantum;
3238c2ecf20Sopenharmony_ci				f->socket_hash = sk->sk_hash;
3248c2ecf20Sopenharmony_ci				if (q->rate_enable)
3258c2ecf20Sopenharmony_ci					smp_store_release(&sk->sk_pacing_status,
3268c2ecf20Sopenharmony_ci							  SK_PACING_FQ);
3278c2ecf20Sopenharmony_ci				if (fq_flow_is_throttled(f))
3288c2ecf20Sopenharmony_ci					fq_flow_unset_throttled(q, f);
3298c2ecf20Sopenharmony_ci				f->time_next_packet = 0ULL;
3308c2ecf20Sopenharmony_ci			}
3318c2ecf20Sopenharmony_ci			return f;
3328c2ecf20Sopenharmony_ci		}
3338c2ecf20Sopenharmony_ci		if (f->sk > sk)
3348c2ecf20Sopenharmony_ci			p = &parent->rb_right;
3358c2ecf20Sopenharmony_ci		else
3368c2ecf20Sopenharmony_ci			p = &parent->rb_left;
3378c2ecf20Sopenharmony_ci	}
3388c2ecf20Sopenharmony_ci
3398c2ecf20Sopenharmony_ci	f = kmem_cache_zalloc(fq_flow_cachep, GFP_ATOMIC | __GFP_NOWARN);
3408c2ecf20Sopenharmony_ci	if (unlikely(!f)) {
3418c2ecf20Sopenharmony_ci		q->stat_allocation_errors++;
3428c2ecf20Sopenharmony_ci		return &q->internal;
3438c2ecf20Sopenharmony_ci	}
3448c2ecf20Sopenharmony_ci	/* f->t_root is already zeroed after kmem_cache_zalloc() */
3458c2ecf20Sopenharmony_ci
3468c2ecf20Sopenharmony_ci	fq_flow_set_detached(f);
3478c2ecf20Sopenharmony_ci	f->sk = sk;
3488c2ecf20Sopenharmony_ci	if (skb->sk == sk) {
3498c2ecf20Sopenharmony_ci		f->socket_hash = sk->sk_hash;
3508c2ecf20Sopenharmony_ci		if (q->rate_enable)
3518c2ecf20Sopenharmony_ci			smp_store_release(&sk->sk_pacing_status,
3528c2ecf20Sopenharmony_ci					  SK_PACING_FQ);
3538c2ecf20Sopenharmony_ci	}
3548c2ecf20Sopenharmony_ci	f->credit = q->initial_quantum;
3558c2ecf20Sopenharmony_ci
3568c2ecf20Sopenharmony_ci	rb_link_node(&f->fq_node, parent, p);
3578c2ecf20Sopenharmony_ci	rb_insert_color(&f->fq_node, root);
3588c2ecf20Sopenharmony_ci
3598c2ecf20Sopenharmony_ci	q->flows++;
3608c2ecf20Sopenharmony_ci	q->inactive_flows++;
3618c2ecf20Sopenharmony_ci	return f;
3628c2ecf20Sopenharmony_ci}
3638c2ecf20Sopenharmony_ci
3648c2ecf20Sopenharmony_cistatic struct sk_buff *fq_peek(struct fq_flow *flow)
3658c2ecf20Sopenharmony_ci{
3668c2ecf20Sopenharmony_ci	struct sk_buff *skb = skb_rb_first(&flow->t_root);
3678c2ecf20Sopenharmony_ci	struct sk_buff *head = flow->head;
3688c2ecf20Sopenharmony_ci
3698c2ecf20Sopenharmony_ci	if (!skb)
3708c2ecf20Sopenharmony_ci		return head;
3718c2ecf20Sopenharmony_ci
3728c2ecf20Sopenharmony_ci	if (!head)
3738c2ecf20Sopenharmony_ci		return skb;
3748c2ecf20Sopenharmony_ci
3758c2ecf20Sopenharmony_ci	if (fq_skb_cb(skb)->time_to_send < fq_skb_cb(head)->time_to_send)
3768c2ecf20Sopenharmony_ci		return skb;
3778c2ecf20Sopenharmony_ci	return head;
3788c2ecf20Sopenharmony_ci}
3798c2ecf20Sopenharmony_ci
3808c2ecf20Sopenharmony_cistatic void fq_erase_head(struct Qdisc *sch, struct fq_flow *flow,
3818c2ecf20Sopenharmony_ci			  struct sk_buff *skb)
3828c2ecf20Sopenharmony_ci{
3838c2ecf20Sopenharmony_ci	if (skb == flow->head) {
3848c2ecf20Sopenharmony_ci		flow->head = skb->next;
3858c2ecf20Sopenharmony_ci	} else {
3868c2ecf20Sopenharmony_ci		rb_erase(&skb->rbnode, &flow->t_root);
3878c2ecf20Sopenharmony_ci		skb->dev = qdisc_dev(sch);
3888c2ecf20Sopenharmony_ci	}
3898c2ecf20Sopenharmony_ci}
3908c2ecf20Sopenharmony_ci
3918c2ecf20Sopenharmony_ci/* Remove one skb from flow queue.
3928c2ecf20Sopenharmony_ci * This skb must be the return value of prior fq_peek().
3938c2ecf20Sopenharmony_ci */
3948c2ecf20Sopenharmony_cistatic void fq_dequeue_skb(struct Qdisc *sch, struct fq_flow *flow,
3958c2ecf20Sopenharmony_ci			   struct sk_buff *skb)
3968c2ecf20Sopenharmony_ci{
3978c2ecf20Sopenharmony_ci	fq_erase_head(sch, flow, skb);
3988c2ecf20Sopenharmony_ci	skb_mark_not_on_list(skb);
3998c2ecf20Sopenharmony_ci	flow->qlen--;
4008c2ecf20Sopenharmony_ci	qdisc_qstats_backlog_dec(sch, skb);
4018c2ecf20Sopenharmony_ci	sch->q.qlen--;
4028c2ecf20Sopenharmony_ci}
4038c2ecf20Sopenharmony_ci
4048c2ecf20Sopenharmony_cistatic void flow_queue_add(struct fq_flow *flow, struct sk_buff *skb)
4058c2ecf20Sopenharmony_ci{
4068c2ecf20Sopenharmony_ci	struct rb_node **p, *parent;
4078c2ecf20Sopenharmony_ci	struct sk_buff *head, *aux;
4088c2ecf20Sopenharmony_ci
4098c2ecf20Sopenharmony_ci	head = flow->head;
4108c2ecf20Sopenharmony_ci	if (!head ||
4118c2ecf20Sopenharmony_ci	    fq_skb_cb(skb)->time_to_send >= fq_skb_cb(flow->tail)->time_to_send) {
4128c2ecf20Sopenharmony_ci		if (!head)
4138c2ecf20Sopenharmony_ci			flow->head = skb;
4148c2ecf20Sopenharmony_ci		else
4158c2ecf20Sopenharmony_ci			flow->tail->next = skb;
4168c2ecf20Sopenharmony_ci		flow->tail = skb;
4178c2ecf20Sopenharmony_ci		skb->next = NULL;
4188c2ecf20Sopenharmony_ci		return;
4198c2ecf20Sopenharmony_ci	}
4208c2ecf20Sopenharmony_ci
4218c2ecf20Sopenharmony_ci	p = &flow->t_root.rb_node;
4228c2ecf20Sopenharmony_ci	parent = NULL;
4238c2ecf20Sopenharmony_ci
4248c2ecf20Sopenharmony_ci	while (*p) {
4258c2ecf20Sopenharmony_ci		parent = *p;
4268c2ecf20Sopenharmony_ci		aux = rb_to_skb(parent);
4278c2ecf20Sopenharmony_ci		if (fq_skb_cb(skb)->time_to_send >= fq_skb_cb(aux)->time_to_send)
4288c2ecf20Sopenharmony_ci			p = &parent->rb_right;
4298c2ecf20Sopenharmony_ci		else
4308c2ecf20Sopenharmony_ci			p = &parent->rb_left;
4318c2ecf20Sopenharmony_ci	}
4328c2ecf20Sopenharmony_ci	rb_link_node(&skb->rbnode, parent, p);
4338c2ecf20Sopenharmony_ci	rb_insert_color(&skb->rbnode, &flow->t_root);
4348c2ecf20Sopenharmony_ci}
4358c2ecf20Sopenharmony_ci
4368c2ecf20Sopenharmony_cistatic bool fq_packet_beyond_horizon(const struct sk_buff *skb,
4378c2ecf20Sopenharmony_ci				    const struct fq_sched_data *q)
4388c2ecf20Sopenharmony_ci{
4398c2ecf20Sopenharmony_ci	return unlikely((s64)skb->tstamp > (s64)(q->ktime_cache + q->horizon));
4408c2ecf20Sopenharmony_ci}
4418c2ecf20Sopenharmony_ci
4428c2ecf20Sopenharmony_cistatic int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
4438c2ecf20Sopenharmony_ci		      struct sk_buff **to_free)
4448c2ecf20Sopenharmony_ci{
4458c2ecf20Sopenharmony_ci	struct fq_sched_data *q = qdisc_priv(sch);
4468c2ecf20Sopenharmony_ci	struct fq_flow *f;
4478c2ecf20Sopenharmony_ci
4488c2ecf20Sopenharmony_ci	if (unlikely(sch->q.qlen >= sch->limit))
4498c2ecf20Sopenharmony_ci		return qdisc_drop(skb, sch, to_free);
4508c2ecf20Sopenharmony_ci
4518c2ecf20Sopenharmony_ci	if (!skb->tstamp) {
4528c2ecf20Sopenharmony_ci		fq_skb_cb(skb)->time_to_send = q->ktime_cache = ktime_get_ns();
4538c2ecf20Sopenharmony_ci	} else {
4548c2ecf20Sopenharmony_ci		/* Check if packet timestamp is too far in the future.
4558c2ecf20Sopenharmony_ci		 * Try first if our cached value, to avoid ktime_get_ns()
4568c2ecf20Sopenharmony_ci		 * cost in most cases.
4578c2ecf20Sopenharmony_ci		 */
4588c2ecf20Sopenharmony_ci		if (fq_packet_beyond_horizon(skb, q)) {
4598c2ecf20Sopenharmony_ci			/* Refresh our cache and check another time */
4608c2ecf20Sopenharmony_ci			q->ktime_cache = ktime_get_ns();
4618c2ecf20Sopenharmony_ci			if (fq_packet_beyond_horizon(skb, q)) {
4628c2ecf20Sopenharmony_ci				if (q->horizon_drop) {
4638c2ecf20Sopenharmony_ci					q->stat_horizon_drops++;
4648c2ecf20Sopenharmony_ci					return qdisc_drop(skb, sch, to_free);
4658c2ecf20Sopenharmony_ci				}
4668c2ecf20Sopenharmony_ci				q->stat_horizon_caps++;
4678c2ecf20Sopenharmony_ci				skb->tstamp = q->ktime_cache + q->horizon;
4688c2ecf20Sopenharmony_ci			}
4698c2ecf20Sopenharmony_ci		}
4708c2ecf20Sopenharmony_ci		fq_skb_cb(skb)->time_to_send = skb->tstamp;
4718c2ecf20Sopenharmony_ci	}
4728c2ecf20Sopenharmony_ci
4738c2ecf20Sopenharmony_ci	f = fq_classify(skb, q);
4748c2ecf20Sopenharmony_ci	if (unlikely(f->qlen >= q->flow_plimit && f != &q->internal)) {
4758c2ecf20Sopenharmony_ci		q->stat_flows_plimit++;
4768c2ecf20Sopenharmony_ci		return qdisc_drop(skb, sch, to_free);
4778c2ecf20Sopenharmony_ci	}
4788c2ecf20Sopenharmony_ci
4798c2ecf20Sopenharmony_ci	f->qlen++;
4808c2ecf20Sopenharmony_ci	qdisc_qstats_backlog_inc(sch, skb);
4818c2ecf20Sopenharmony_ci	if (fq_flow_is_detached(f)) {
4828c2ecf20Sopenharmony_ci		fq_flow_add_tail(&q->new_flows, f);
4838c2ecf20Sopenharmony_ci		if (time_after(jiffies, f->age + q->flow_refill_delay))
4848c2ecf20Sopenharmony_ci			f->credit = max_t(u32, f->credit, q->quantum);
4858c2ecf20Sopenharmony_ci		q->inactive_flows--;
4868c2ecf20Sopenharmony_ci	}
4878c2ecf20Sopenharmony_ci
4888c2ecf20Sopenharmony_ci	/* Note: this overwrites f->age */
4898c2ecf20Sopenharmony_ci	flow_queue_add(f, skb);
4908c2ecf20Sopenharmony_ci
4918c2ecf20Sopenharmony_ci	if (unlikely(f == &q->internal)) {
4928c2ecf20Sopenharmony_ci		q->stat_internal_packets++;
4938c2ecf20Sopenharmony_ci	}
4948c2ecf20Sopenharmony_ci	sch->q.qlen++;
4958c2ecf20Sopenharmony_ci
4968c2ecf20Sopenharmony_ci	return NET_XMIT_SUCCESS;
4978c2ecf20Sopenharmony_ci}
4988c2ecf20Sopenharmony_ci
4998c2ecf20Sopenharmony_cistatic void fq_check_throttled(struct fq_sched_data *q, u64 now)
5008c2ecf20Sopenharmony_ci{
5018c2ecf20Sopenharmony_ci	unsigned long sample;
5028c2ecf20Sopenharmony_ci	struct rb_node *p;
5038c2ecf20Sopenharmony_ci
5048c2ecf20Sopenharmony_ci	if (q->time_next_delayed_flow > now)
5058c2ecf20Sopenharmony_ci		return;
5068c2ecf20Sopenharmony_ci
5078c2ecf20Sopenharmony_ci	/* Update unthrottle latency EWMA.
5088c2ecf20Sopenharmony_ci	 * This is cheap and can help diagnosing timer/latency problems.
5098c2ecf20Sopenharmony_ci	 */
5108c2ecf20Sopenharmony_ci	sample = (unsigned long)(now - q->time_next_delayed_flow);
5118c2ecf20Sopenharmony_ci	q->unthrottle_latency_ns -= q->unthrottle_latency_ns >> 3;
5128c2ecf20Sopenharmony_ci	q->unthrottle_latency_ns += sample >> 3;
5138c2ecf20Sopenharmony_ci
5148c2ecf20Sopenharmony_ci	q->time_next_delayed_flow = ~0ULL;
5158c2ecf20Sopenharmony_ci	while ((p = rb_first(&q->delayed)) != NULL) {
5168c2ecf20Sopenharmony_ci		struct fq_flow *f = rb_entry(p, struct fq_flow, rate_node);
5178c2ecf20Sopenharmony_ci
5188c2ecf20Sopenharmony_ci		if (f->time_next_packet > now) {
5198c2ecf20Sopenharmony_ci			q->time_next_delayed_flow = f->time_next_packet;
5208c2ecf20Sopenharmony_ci			break;
5218c2ecf20Sopenharmony_ci		}
5228c2ecf20Sopenharmony_ci		fq_flow_unset_throttled(q, f);
5238c2ecf20Sopenharmony_ci	}
5248c2ecf20Sopenharmony_ci}
5258c2ecf20Sopenharmony_ci
5268c2ecf20Sopenharmony_cistatic struct sk_buff *fq_dequeue(struct Qdisc *sch)
5278c2ecf20Sopenharmony_ci{
5288c2ecf20Sopenharmony_ci	struct fq_sched_data *q = qdisc_priv(sch);
5298c2ecf20Sopenharmony_ci	struct fq_flow_head *head;
5308c2ecf20Sopenharmony_ci	struct sk_buff *skb;
5318c2ecf20Sopenharmony_ci	struct fq_flow *f;
5328c2ecf20Sopenharmony_ci	unsigned long rate;
5338c2ecf20Sopenharmony_ci	u32 plen;
5348c2ecf20Sopenharmony_ci	u64 now;
5358c2ecf20Sopenharmony_ci
5368c2ecf20Sopenharmony_ci	if (!sch->q.qlen)
5378c2ecf20Sopenharmony_ci		return NULL;
5388c2ecf20Sopenharmony_ci
5398c2ecf20Sopenharmony_ci	skb = fq_peek(&q->internal);
5408c2ecf20Sopenharmony_ci	if (unlikely(skb)) {
5418c2ecf20Sopenharmony_ci		fq_dequeue_skb(sch, &q->internal, skb);
5428c2ecf20Sopenharmony_ci		goto out;
5438c2ecf20Sopenharmony_ci	}
5448c2ecf20Sopenharmony_ci
5458c2ecf20Sopenharmony_ci	q->ktime_cache = now = ktime_get_ns();
5468c2ecf20Sopenharmony_ci	fq_check_throttled(q, now);
5478c2ecf20Sopenharmony_cibegin:
5488c2ecf20Sopenharmony_ci	head = &q->new_flows;
5498c2ecf20Sopenharmony_ci	if (!head->first) {
5508c2ecf20Sopenharmony_ci		head = &q->old_flows;
5518c2ecf20Sopenharmony_ci		if (!head->first) {
5528c2ecf20Sopenharmony_ci			if (q->time_next_delayed_flow != ~0ULL)
5538c2ecf20Sopenharmony_ci				qdisc_watchdog_schedule_range_ns(&q->watchdog,
5548c2ecf20Sopenharmony_ci							q->time_next_delayed_flow,
5558c2ecf20Sopenharmony_ci							q->timer_slack);
5568c2ecf20Sopenharmony_ci			return NULL;
5578c2ecf20Sopenharmony_ci		}
5588c2ecf20Sopenharmony_ci	}
5598c2ecf20Sopenharmony_ci	f = head->first;
5608c2ecf20Sopenharmony_ci
5618c2ecf20Sopenharmony_ci	if (f->credit <= 0) {
5628c2ecf20Sopenharmony_ci		f->credit += q->quantum;
5638c2ecf20Sopenharmony_ci		head->first = f->next;
5648c2ecf20Sopenharmony_ci		fq_flow_add_tail(&q->old_flows, f);
5658c2ecf20Sopenharmony_ci		goto begin;
5668c2ecf20Sopenharmony_ci	}
5678c2ecf20Sopenharmony_ci
5688c2ecf20Sopenharmony_ci	skb = fq_peek(f);
5698c2ecf20Sopenharmony_ci	if (skb) {
5708c2ecf20Sopenharmony_ci		u64 time_next_packet = max_t(u64, fq_skb_cb(skb)->time_to_send,
5718c2ecf20Sopenharmony_ci					     f->time_next_packet);
5728c2ecf20Sopenharmony_ci
5738c2ecf20Sopenharmony_ci		if (now < time_next_packet) {
5748c2ecf20Sopenharmony_ci			head->first = f->next;
5758c2ecf20Sopenharmony_ci			f->time_next_packet = time_next_packet;
5768c2ecf20Sopenharmony_ci			fq_flow_set_throttled(q, f);
5778c2ecf20Sopenharmony_ci			goto begin;
5788c2ecf20Sopenharmony_ci		}
5798c2ecf20Sopenharmony_ci		prefetch(&skb->end);
5808c2ecf20Sopenharmony_ci		if ((s64)(now - time_next_packet - q->ce_threshold) > 0) {
5818c2ecf20Sopenharmony_ci			INET_ECN_set_ce(skb);
5828c2ecf20Sopenharmony_ci			q->stat_ce_mark++;
5838c2ecf20Sopenharmony_ci		}
5848c2ecf20Sopenharmony_ci		fq_dequeue_skb(sch, f, skb);
5858c2ecf20Sopenharmony_ci	} else {
5868c2ecf20Sopenharmony_ci		head->first = f->next;
5878c2ecf20Sopenharmony_ci		/* force a pass through old_flows to prevent starvation */
5888c2ecf20Sopenharmony_ci		if ((head == &q->new_flows) && q->old_flows.first) {
5898c2ecf20Sopenharmony_ci			fq_flow_add_tail(&q->old_flows, f);
5908c2ecf20Sopenharmony_ci		} else {
5918c2ecf20Sopenharmony_ci			fq_flow_set_detached(f);
5928c2ecf20Sopenharmony_ci			q->inactive_flows++;
5938c2ecf20Sopenharmony_ci		}
5948c2ecf20Sopenharmony_ci		goto begin;
5958c2ecf20Sopenharmony_ci	}
5968c2ecf20Sopenharmony_ci	plen = qdisc_pkt_len(skb);
5978c2ecf20Sopenharmony_ci	f->credit -= plen;
5988c2ecf20Sopenharmony_ci
5998c2ecf20Sopenharmony_ci	if (!q->rate_enable)
6008c2ecf20Sopenharmony_ci		goto out;
6018c2ecf20Sopenharmony_ci
6028c2ecf20Sopenharmony_ci	rate = q->flow_max_rate;
6038c2ecf20Sopenharmony_ci
6048c2ecf20Sopenharmony_ci	/* If EDT time was provided for this skb, we need to
6058c2ecf20Sopenharmony_ci	 * update f->time_next_packet only if this qdisc enforces
6068c2ecf20Sopenharmony_ci	 * a flow max rate.
6078c2ecf20Sopenharmony_ci	 */
6088c2ecf20Sopenharmony_ci	if (!skb->tstamp) {
6098c2ecf20Sopenharmony_ci		if (skb->sk)
6108c2ecf20Sopenharmony_ci			rate = min(skb->sk->sk_pacing_rate, rate);
6118c2ecf20Sopenharmony_ci
6128c2ecf20Sopenharmony_ci		if (rate <= q->low_rate_threshold) {
6138c2ecf20Sopenharmony_ci			f->credit = 0;
6148c2ecf20Sopenharmony_ci		} else {
6158c2ecf20Sopenharmony_ci			plen = max(plen, q->quantum);
6168c2ecf20Sopenharmony_ci			if (f->credit > 0)
6178c2ecf20Sopenharmony_ci				goto out;
6188c2ecf20Sopenharmony_ci		}
6198c2ecf20Sopenharmony_ci	}
6208c2ecf20Sopenharmony_ci	if (rate != ~0UL) {
6218c2ecf20Sopenharmony_ci		u64 len = (u64)plen * NSEC_PER_SEC;
6228c2ecf20Sopenharmony_ci
6238c2ecf20Sopenharmony_ci		if (likely(rate))
6248c2ecf20Sopenharmony_ci			len = div64_ul(len, rate);
6258c2ecf20Sopenharmony_ci		/* Since socket rate can change later,
6268c2ecf20Sopenharmony_ci		 * clamp the delay to 1 second.
6278c2ecf20Sopenharmony_ci		 * Really, providers of too big packets should be fixed !
6288c2ecf20Sopenharmony_ci		 */
6298c2ecf20Sopenharmony_ci		if (unlikely(len > NSEC_PER_SEC)) {
6308c2ecf20Sopenharmony_ci			len = NSEC_PER_SEC;
6318c2ecf20Sopenharmony_ci			q->stat_pkts_too_long++;
6328c2ecf20Sopenharmony_ci		}
6338c2ecf20Sopenharmony_ci		/* Account for schedule/timers drifts.
6348c2ecf20Sopenharmony_ci		 * f->time_next_packet was set when prior packet was sent,
6358c2ecf20Sopenharmony_ci		 * and current time (@now) can be too late by tens of us.
6368c2ecf20Sopenharmony_ci		 */
6378c2ecf20Sopenharmony_ci		if (f->time_next_packet)
6388c2ecf20Sopenharmony_ci			len -= min(len/2, now - f->time_next_packet);
6398c2ecf20Sopenharmony_ci		f->time_next_packet = now + len;
6408c2ecf20Sopenharmony_ci	}
6418c2ecf20Sopenharmony_ciout:
6428c2ecf20Sopenharmony_ci	qdisc_bstats_update(sch, skb);
6438c2ecf20Sopenharmony_ci	return skb;
6448c2ecf20Sopenharmony_ci}
6458c2ecf20Sopenharmony_ci
6468c2ecf20Sopenharmony_cistatic void fq_flow_purge(struct fq_flow *flow)
6478c2ecf20Sopenharmony_ci{
6488c2ecf20Sopenharmony_ci	struct rb_node *p = rb_first(&flow->t_root);
6498c2ecf20Sopenharmony_ci
6508c2ecf20Sopenharmony_ci	while (p) {
6518c2ecf20Sopenharmony_ci		struct sk_buff *skb = rb_to_skb(p);
6528c2ecf20Sopenharmony_ci
6538c2ecf20Sopenharmony_ci		p = rb_next(p);
6548c2ecf20Sopenharmony_ci		rb_erase(&skb->rbnode, &flow->t_root);
6558c2ecf20Sopenharmony_ci		rtnl_kfree_skbs(skb, skb);
6568c2ecf20Sopenharmony_ci	}
6578c2ecf20Sopenharmony_ci	rtnl_kfree_skbs(flow->head, flow->tail);
6588c2ecf20Sopenharmony_ci	flow->head = NULL;
6598c2ecf20Sopenharmony_ci	flow->qlen = 0;
6608c2ecf20Sopenharmony_ci}
6618c2ecf20Sopenharmony_ci
6628c2ecf20Sopenharmony_cistatic void fq_reset(struct Qdisc *sch)
6638c2ecf20Sopenharmony_ci{
6648c2ecf20Sopenharmony_ci	struct fq_sched_data *q = qdisc_priv(sch);
6658c2ecf20Sopenharmony_ci	struct rb_root *root;
6668c2ecf20Sopenharmony_ci	struct rb_node *p;
6678c2ecf20Sopenharmony_ci	struct fq_flow *f;
6688c2ecf20Sopenharmony_ci	unsigned int idx;
6698c2ecf20Sopenharmony_ci
6708c2ecf20Sopenharmony_ci	sch->q.qlen = 0;
6718c2ecf20Sopenharmony_ci	sch->qstats.backlog = 0;
6728c2ecf20Sopenharmony_ci
6738c2ecf20Sopenharmony_ci	fq_flow_purge(&q->internal);
6748c2ecf20Sopenharmony_ci
6758c2ecf20Sopenharmony_ci	if (!q->fq_root)
6768c2ecf20Sopenharmony_ci		return;
6778c2ecf20Sopenharmony_ci
6788c2ecf20Sopenharmony_ci	for (idx = 0; idx < (1U << q->fq_trees_log); idx++) {
6798c2ecf20Sopenharmony_ci		root = &q->fq_root[idx];
6808c2ecf20Sopenharmony_ci		while ((p = rb_first(root)) != NULL) {
6818c2ecf20Sopenharmony_ci			f = rb_entry(p, struct fq_flow, fq_node);
6828c2ecf20Sopenharmony_ci			rb_erase(p, root);
6838c2ecf20Sopenharmony_ci
6848c2ecf20Sopenharmony_ci			fq_flow_purge(f);
6858c2ecf20Sopenharmony_ci
6868c2ecf20Sopenharmony_ci			kmem_cache_free(fq_flow_cachep, f);
6878c2ecf20Sopenharmony_ci		}
6888c2ecf20Sopenharmony_ci	}
6898c2ecf20Sopenharmony_ci	q->new_flows.first	= NULL;
6908c2ecf20Sopenharmony_ci	q->old_flows.first	= NULL;
6918c2ecf20Sopenharmony_ci	q->delayed		= RB_ROOT;
6928c2ecf20Sopenharmony_ci	q->flows		= 0;
6938c2ecf20Sopenharmony_ci	q->inactive_flows	= 0;
6948c2ecf20Sopenharmony_ci	q->throttled_flows	= 0;
6958c2ecf20Sopenharmony_ci}
6968c2ecf20Sopenharmony_ci
6978c2ecf20Sopenharmony_cistatic void fq_rehash(struct fq_sched_data *q,
6988c2ecf20Sopenharmony_ci		      struct rb_root *old_array, u32 old_log,
6998c2ecf20Sopenharmony_ci		      struct rb_root *new_array, u32 new_log)
7008c2ecf20Sopenharmony_ci{
7018c2ecf20Sopenharmony_ci	struct rb_node *op, **np, *parent;
7028c2ecf20Sopenharmony_ci	struct rb_root *oroot, *nroot;
7038c2ecf20Sopenharmony_ci	struct fq_flow *of, *nf;
7048c2ecf20Sopenharmony_ci	int fcnt = 0;
7058c2ecf20Sopenharmony_ci	u32 idx;
7068c2ecf20Sopenharmony_ci
7078c2ecf20Sopenharmony_ci	for (idx = 0; idx < (1U << old_log); idx++) {
7088c2ecf20Sopenharmony_ci		oroot = &old_array[idx];
7098c2ecf20Sopenharmony_ci		while ((op = rb_first(oroot)) != NULL) {
7108c2ecf20Sopenharmony_ci			rb_erase(op, oroot);
7118c2ecf20Sopenharmony_ci			of = rb_entry(op, struct fq_flow, fq_node);
7128c2ecf20Sopenharmony_ci			if (fq_gc_candidate(of)) {
7138c2ecf20Sopenharmony_ci				fcnt++;
7148c2ecf20Sopenharmony_ci				kmem_cache_free(fq_flow_cachep, of);
7158c2ecf20Sopenharmony_ci				continue;
7168c2ecf20Sopenharmony_ci			}
7178c2ecf20Sopenharmony_ci			nroot = &new_array[hash_ptr(of->sk, new_log)];
7188c2ecf20Sopenharmony_ci
7198c2ecf20Sopenharmony_ci			np = &nroot->rb_node;
7208c2ecf20Sopenharmony_ci			parent = NULL;
7218c2ecf20Sopenharmony_ci			while (*np) {
7228c2ecf20Sopenharmony_ci				parent = *np;
7238c2ecf20Sopenharmony_ci
7248c2ecf20Sopenharmony_ci				nf = rb_entry(parent, struct fq_flow, fq_node);
7258c2ecf20Sopenharmony_ci				BUG_ON(nf->sk == of->sk);
7268c2ecf20Sopenharmony_ci
7278c2ecf20Sopenharmony_ci				if (nf->sk > of->sk)
7288c2ecf20Sopenharmony_ci					np = &parent->rb_right;
7298c2ecf20Sopenharmony_ci				else
7308c2ecf20Sopenharmony_ci					np = &parent->rb_left;
7318c2ecf20Sopenharmony_ci			}
7328c2ecf20Sopenharmony_ci
7338c2ecf20Sopenharmony_ci			rb_link_node(&of->fq_node, parent, np);
7348c2ecf20Sopenharmony_ci			rb_insert_color(&of->fq_node, nroot);
7358c2ecf20Sopenharmony_ci		}
7368c2ecf20Sopenharmony_ci	}
7378c2ecf20Sopenharmony_ci	q->flows -= fcnt;
7388c2ecf20Sopenharmony_ci	q->inactive_flows -= fcnt;
7398c2ecf20Sopenharmony_ci	q->stat_gc_flows += fcnt;
7408c2ecf20Sopenharmony_ci}
7418c2ecf20Sopenharmony_ci
7428c2ecf20Sopenharmony_cistatic void fq_free(void *addr)
7438c2ecf20Sopenharmony_ci{
7448c2ecf20Sopenharmony_ci	kvfree(addr);
7458c2ecf20Sopenharmony_ci}
7468c2ecf20Sopenharmony_ci
7478c2ecf20Sopenharmony_cistatic int fq_resize(struct Qdisc *sch, u32 log)
7488c2ecf20Sopenharmony_ci{
7498c2ecf20Sopenharmony_ci	struct fq_sched_data *q = qdisc_priv(sch);
7508c2ecf20Sopenharmony_ci	struct rb_root *array;
7518c2ecf20Sopenharmony_ci	void *old_fq_root;
7528c2ecf20Sopenharmony_ci	u32 idx;
7538c2ecf20Sopenharmony_ci
7548c2ecf20Sopenharmony_ci	if (q->fq_root && log == q->fq_trees_log)
7558c2ecf20Sopenharmony_ci		return 0;
7568c2ecf20Sopenharmony_ci
7578c2ecf20Sopenharmony_ci	/* If XPS was setup, we can allocate memory on right NUMA node */
7588c2ecf20Sopenharmony_ci	array = kvmalloc_node(sizeof(struct rb_root) << log, GFP_KERNEL | __GFP_RETRY_MAYFAIL,
7598c2ecf20Sopenharmony_ci			      netdev_queue_numa_node_read(sch->dev_queue));
7608c2ecf20Sopenharmony_ci	if (!array)
7618c2ecf20Sopenharmony_ci		return -ENOMEM;
7628c2ecf20Sopenharmony_ci
7638c2ecf20Sopenharmony_ci	for (idx = 0; idx < (1U << log); idx++)
7648c2ecf20Sopenharmony_ci		array[idx] = RB_ROOT;
7658c2ecf20Sopenharmony_ci
7668c2ecf20Sopenharmony_ci	sch_tree_lock(sch);
7678c2ecf20Sopenharmony_ci
7688c2ecf20Sopenharmony_ci	old_fq_root = q->fq_root;
7698c2ecf20Sopenharmony_ci	if (old_fq_root)
7708c2ecf20Sopenharmony_ci		fq_rehash(q, old_fq_root, q->fq_trees_log, array, log);
7718c2ecf20Sopenharmony_ci
7728c2ecf20Sopenharmony_ci	q->fq_root = array;
7738c2ecf20Sopenharmony_ci	q->fq_trees_log = log;
7748c2ecf20Sopenharmony_ci
7758c2ecf20Sopenharmony_ci	sch_tree_unlock(sch);
7768c2ecf20Sopenharmony_ci
7778c2ecf20Sopenharmony_ci	fq_free(old_fq_root);
7788c2ecf20Sopenharmony_ci
7798c2ecf20Sopenharmony_ci	return 0;
7808c2ecf20Sopenharmony_ci}
7818c2ecf20Sopenharmony_ci
7828c2ecf20Sopenharmony_cistatic struct netlink_range_validation iq_range = {
7838c2ecf20Sopenharmony_ci	.max = INT_MAX,
7848c2ecf20Sopenharmony_ci};
7858c2ecf20Sopenharmony_ci
7868c2ecf20Sopenharmony_cistatic const struct nla_policy fq_policy[TCA_FQ_MAX + 1] = {
7878c2ecf20Sopenharmony_ci	[TCA_FQ_UNSPEC]			= { .strict_start_type = TCA_FQ_TIMER_SLACK },
7888c2ecf20Sopenharmony_ci
7898c2ecf20Sopenharmony_ci	[TCA_FQ_PLIMIT]			= { .type = NLA_U32 },
7908c2ecf20Sopenharmony_ci	[TCA_FQ_FLOW_PLIMIT]		= { .type = NLA_U32 },
7918c2ecf20Sopenharmony_ci	[TCA_FQ_QUANTUM]		= { .type = NLA_U32 },
7928c2ecf20Sopenharmony_ci	[TCA_FQ_INITIAL_QUANTUM]	= NLA_POLICY_FULL_RANGE(NLA_U32, &iq_range),
7938c2ecf20Sopenharmony_ci	[TCA_FQ_RATE_ENABLE]		= { .type = NLA_U32 },
7948c2ecf20Sopenharmony_ci	[TCA_FQ_FLOW_DEFAULT_RATE]	= { .type = NLA_U32 },
7958c2ecf20Sopenharmony_ci	[TCA_FQ_FLOW_MAX_RATE]		= { .type = NLA_U32 },
7968c2ecf20Sopenharmony_ci	[TCA_FQ_BUCKETS_LOG]		= { .type = NLA_U32 },
7978c2ecf20Sopenharmony_ci	[TCA_FQ_FLOW_REFILL_DELAY]	= { .type = NLA_U32 },
7988c2ecf20Sopenharmony_ci	[TCA_FQ_ORPHAN_MASK]		= { .type = NLA_U32 },
7998c2ecf20Sopenharmony_ci	[TCA_FQ_LOW_RATE_THRESHOLD]	= { .type = NLA_U32 },
8008c2ecf20Sopenharmony_ci	[TCA_FQ_CE_THRESHOLD]		= { .type = NLA_U32 },
8018c2ecf20Sopenharmony_ci	[TCA_FQ_TIMER_SLACK]		= { .type = NLA_U32 },
8028c2ecf20Sopenharmony_ci	[TCA_FQ_HORIZON]		= { .type = NLA_U32 },
8038c2ecf20Sopenharmony_ci	[TCA_FQ_HORIZON_DROP]		= { .type = NLA_U8 },
8048c2ecf20Sopenharmony_ci};
8058c2ecf20Sopenharmony_ci
8068c2ecf20Sopenharmony_cistatic int fq_change(struct Qdisc *sch, struct nlattr *opt,
8078c2ecf20Sopenharmony_ci		     struct netlink_ext_ack *extack)
8088c2ecf20Sopenharmony_ci{
8098c2ecf20Sopenharmony_ci	struct fq_sched_data *q = qdisc_priv(sch);
8108c2ecf20Sopenharmony_ci	struct nlattr *tb[TCA_FQ_MAX + 1];
8118c2ecf20Sopenharmony_ci	int err, drop_count = 0;
8128c2ecf20Sopenharmony_ci	unsigned drop_len = 0;
8138c2ecf20Sopenharmony_ci	u32 fq_log;
8148c2ecf20Sopenharmony_ci
8158c2ecf20Sopenharmony_ci	if (!opt)
8168c2ecf20Sopenharmony_ci		return -EINVAL;
8178c2ecf20Sopenharmony_ci
8188c2ecf20Sopenharmony_ci	err = nla_parse_nested_deprecated(tb, TCA_FQ_MAX, opt, fq_policy,
8198c2ecf20Sopenharmony_ci					  NULL);
8208c2ecf20Sopenharmony_ci	if (err < 0)
8218c2ecf20Sopenharmony_ci		return err;
8228c2ecf20Sopenharmony_ci
8238c2ecf20Sopenharmony_ci	sch_tree_lock(sch);
8248c2ecf20Sopenharmony_ci
8258c2ecf20Sopenharmony_ci	fq_log = q->fq_trees_log;
8268c2ecf20Sopenharmony_ci
8278c2ecf20Sopenharmony_ci	if (tb[TCA_FQ_BUCKETS_LOG]) {
8288c2ecf20Sopenharmony_ci		u32 nval = nla_get_u32(tb[TCA_FQ_BUCKETS_LOG]);
8298c2ecf20Sopenharmony_ci
8308c2ecf20Sopenharmony_ci		if (nval >= 1 && nval <= ilog2(256*1024))
8318c2ecf20Sopenharmony_ci			fq_log = nval;
8328c2ecf20Sopenharmony_ci		else
8338c2ecf20Sopenharmony_ci			err = -EINVAL;
8348c2ecf20Sopenharmony_ci	}
8358c2ecf20Sopenharmony_ci	if (tb[TCA_FQ_PLIMIT])
8368c2ecf20Sopenharmony_ci		sch->limit = nla_get_u32(tb[TCA_FQ_PLIMIT]);
8378c2ecf20Sopenharmony_ci
8388c2ecf20Sopenharmony_ci	if (tb[TCA_FQ_FLOW_PLIMIT])
8398c2ecf20Sopenharmony_ci		q->flow_plimit = nla_get_u32(tb[TCA_FQ_FLOW_PLIMIT]);
8408c2ecf20Sopenharmony_ci
8418c2ecf20Sopenharmony_ci	if (tb[TCA_FQ_QUANTUM]) {
8428c2ecf20Sopenharmony_ci		u32 quantum = nla_get_u32(tb[TCA_FQ_QUANTUM]);
8438c2ecf20Sopenharmony_ci
8448c2ecf20Sopenharmony_ci		if (quantum > 0 && quantum <= (1 << 20)) {
8458c2ecf20Sopenharmony_ci			q->quantum = quantum;
8468c2ecf20Sopenharmony_ci		} else {
8478c2ecf20Sopenharmony_ci			NL_SET_ERR_MSG_MOD(extack, "invalid quantum");
8488c2ecf20Sopenharmony_ci			err = -EINVAL;
8498c2ecf20Sopenharmony_ci		}
8508c2ecf20Sopenharmony_ci	}
8518c2ecf20Sopenharmony_ci
8528c2ecf20Sopenharmony_ci	if (tb[TCA_FQ_INITIAL_QUANTUM])
8538c2ecf20Sopenharmony_ci		q->initial_quantum = nla_get_u32(tb[TCA_FQ_INITIAL_QUANTUM]);
8548c2ecf20Sopenharmony_ci
8558c2ecf20Sopenharmony_ci	if (tb[TCA_FQ_FLOW_DEFAULT_RATE])
8568c2ecf20Sopenharmony_ci		pr_warn_ratelimited("sch_fq: defrate %u ignored.\n",
8578c2ecf20Sopenharmony_ci				    nla_get_u32(tb[TCA_FQ_FLOW_DEFAULT_RATE]));
8588c2ecf20Sopenharmony_ci
8598c2ecf20Sopenharmony_ci	if (tb[TCA_FQ_FLOW_MAX_RATE]) {
8608c2ecf20Sopenharmony_ci		u32 rate = nla_get_u32(tb[TCA_FQ_FLOW_MAX_RATE]);
8618c2ecf20Sopenharmony_ci
8628c2ecf20Sopenharmony_ci		q->flow_max_rate = (rate == ~0U) ? ~0UL : rate;
8638c2ecf20Sopenharmony_ci	}
8648c2ecf20Sopenharmony_ci	if (tb[TCA_FQ_LOW_RATE_THRESHOLD])
8658c2ecf20Sopenharmony_ci		q->low_rate_threshold =
8668c2ecf20Sopenharmony_ci			nla_get_u32(tb[TCA_FQ_LOW_RATE_THRESHOLD]);
8678c2ecf20Sopenharmony_ci
8688c2ecf20Sopenharmony_ci	if (tb[TCA_FQ_RATE_ENABLE]) {
8698c2ecf20Sopenharmony_ci		u32 enable = nla_get_u32(tb[TCA_FQ_RATE_ENABLE]);
8708c2ecf20Sopenharmony_ci
8718c2ecf20Sopenharmony_ci		if (enable <= 1)
8728c2ecf20Sopenharmony_ci			q->rate_enable = enable;
8738c2ecf20Sopenharmony_ci		else
8748c2ecf20Sopenharmony_ci			err = -EINVAL;
8758c2ecf20Sopenharmony_ci	}
8768c2ecf20Sopenharmony_ci
8778c2ecf20Sopenharmony_ci	if (tb[TCA_FQ_FLOW_REFILL_DELAY]) {
8788c2ecf20Sopenharmony_ci		u32 usecs_delay = nla_get_u32(tb[TCA_FQ_FLOW_REFILL_DELAY]) ;
8798c2ecf20Sopenharmony_ci
8808c2ecf20Sopenharmony_ci		q->flow_refill_delay = usecs_to_jiffies(usecs_delay);
8818c2ecf20Sopenharmony_ci	}
8828c2ecf20Sopenharmony_ci
8838c2ecf20Sopenharmony_ci	if (tb[TCA_FQ_ORPHAN_MASK])
8848c2ecf20Sopenharmony_ci		q->orphan_mask = nla_get_u32(tb[TCA_FQ_ORPHAN_MASK]);
8858c2ecf20Sopenharmony_ci
8868c2ecf20Sopenharmony_ci	if (tb[TCA_FQ_CE_THRESHOLD])
8878c2ecf20Sopenharmony_ci		q->ce_threshold = (u64)NSEC_PER_USEC *
8888c2ecf20Sopenharmony_ci				  nla_get_u32(tb[TCA_FQ_CE_THRESHOLD]);
8898c2ecf20Sopenharmony_ci
8908c2ecf20Sopenharmony_ci	if (tb[TCA_FQ_TIMER_SLACK])
8918c2ecf20Sopenharmony_ci		q->timer_slack = nla_get_u32(tb[TCA_FQ_TIMER_SLACK]);
8928c2ecf20Sopenharmony_ci
8938c2ecf20Sopenharmony_ci	if (tb[TCA_FQ_HORIZON])
8948c2ecf20Sopenharmony_ci		q->horizon = (u64)NSEC_PER_USEC *
8958c2ecf20Sopenharmony_ci				  nla_get_u32(tb[TCA_FQ_HORIZON]);
8968c2ecf20Sopenharmony_ci
8978c2ecf20Sopenharmony_ci	if (tb[TCA_FQ_HORIZON_DROP])
8988c2ecf20Sopenharmony_ci		q->horizon_drop = nla_get_u8(tb[TCA_FQ_HORIZON_DROP]);
8998c2ecf20Sopenharmony_ci
9008c2ecf20Sopenharmony_ci	if (!err) {
9018c2ecf20Sopenharmony_ci
9028c2ecf20Sopenharmony_ci		sch_tree_unlock(sch);
9038c2ecf20Sopenharmony_ci		err = fq_resize(sch, fq_log);
9048c2ecf20Sopenharmony_ci		sch_tree_lock(sch);
9058c2ecf20Sopenharmony_ci	}
9068c2ecf20Sopenharmony_ci	while (sch->q.qlen > sch->limit) {
9078c2ecf20Sopenharmony_ci		struct sk_buff *skb = fq_dequeue(sch);
9088c2ecf20Sopenharmony_ci
9098c2ecf20Sopenharmony_ci		if (!skb)
9108c2ecf20Sopenharmony_ci			break;
9118c2ecf20Sopenharmony_ci		drop_len += qdisc_pkt_len(skb);
9128c2ecf20Sopenharmony_ci		rtnl_kfree_skbs(skb, skb);
9138c2ecf20Sopenharmony_ci		drop_count++;
9148c2ecf20Sopenharmony_ci	}
9158c2ecf20Sopenharmony_ci	qdisc_tree_reduce_backlog(sch, drop_count, drop_len);
9168c2ecf20Sopenharmony_ci
9178c2ecf20Sopenharmony_ci	sch_tree_unlock(sch);
9188c2ecf20Sopenharmony_ci	return err;
9198c2ecf20Sopenharmony_ci}
9208c2ecf20Sopenharmony_ci
9218c2ecf20Sopenharmony_cistatic void fq_destroy(struct Qdisc *sch)
9228c2ecf20Sopenharmony_ci{
9238c2ecf20Sopenharmony_ci	struct fq_sched_data *q = qdisc_priv(sch);
9248c2ecf20Sopenharmony_ci
9258c2ecf20Sopenharmony_ci	fq_reset(sch);
9268c2ecf20Sopenharmony_ci	fq_free(q->fq_root);
9278c2ecf20Sopenharmony_ci	qdisc_watchdog_cancel(&q->watchdog);
9288c2ecf20Sopenharmony_ci}
9298c2ecf20Sopenharmony_ci
9308c2ecf20Sopenharmony_cistatic int fq_init(struct Qdisc *sch, struct nlattr *opt,
9318c2ecf20Sopenharmony_ci		   struct netlink_ext_ack *extack)
9328c2ecf20Sopenharmony_ci{
9338c2ecf20Sopenharmony_ci	struct fq_sched_data *q = qdisc_priv(sch);
9348c2ecf20Sopenharmony_ci	int err;
9358c2ecf20Sopenharmony_ci
9368c2ecf20Sopenharmony_ci	sch->limit		= 10000;
9378c2ecf20Sopenharmony_ci	q->flow_plimit		= 100;
9388c2ecf20Sopenharmony_ci	q->quantum		= 2 * psched_mtu(qdisc_dev(sch));
9398c2ecf20Sopenharmony_ci	q->initial_quantum	= 10 * psched_mtu(qdisc_dev(sch));
9408c2ecf20Sopenharmony_ci	q->flow_refill_delay	= msecs_to_jiffies(40);
9418c2ecf20Sopenharmony_ci	q->flow_max_rate	= ~0UL;
9428c2ecf20Sopenharmony_ci	q->time_next_delayed_flow = ~0ULL;
9438c2ecf20Sopenharmony_ci	q->rate_enable		= 1;
9448c2ecf20Sopenharmony_ci	q->new_flows.first	= NULL;
9458c2ecf20Sopenharmony_ci	q->old_flows.first	= NULL;
9468c2ecf20Sopenharmony_ci	q->delayed		= RB_ROOT;
9478c2ecf20Sopenharmony_ci	q->fq_root		= NULL;
9488c2ecf20Sopenharmony_ci	q->fq_trees_log		= ilog2(1024);
9498c2ecf20Sopenharmony_ci	q->orphan_mask		= 1024 - 1;
9508c2ecf20Sopenharmony_ci	q->low_rate_threshold	= 550000 / 8;
9518c2ecf20Sopenharmony_ci
9528c2ecf20Sopenharmony_ci	q->timer_slack = 10 * NSEC_PER_USEC; /* 10 usec of hrtimer slack */
9538c2ecf20Sopenharmony_ci
9548c2ecf20Sopenharmony_ci	q->horizon = 10ULL * NSEC_PER_SEC; /* 10 seconds */
9558c2ecf20Sopenharmony_ci	q->horizon_drop = 1; /* by default, drop packets beyond horizon */
9568c2ecf20Sopenharmony_ci
9578c2ecf20Sopenharmony_ci	/* Default ce_threshold of 4294 seconds */
9588c2ecf20Sopenharmony_ci	q->ce_threshold		= (u64)NSEC_PER_USEC * ~0U;
9598c2ecf20Sopenharmony_ci
9608c2ecf20Sopenharmony_ci	qdisc_watchdog_init_clockid(&q->watchdog, sch, CLOCK_MONOTONIC);
9618c2ecf20Sopenharmony_ci
9628c2ecf20Sopenharmony_ci	if (opt)
9638c2ecf20Sopenharmony_ci		err = fq_change(sch, opt, extack);
9648c2ecf20Sopenharmony_ci	else
9658c2ecf20Sopenharmony_ci		err = fq_resize(sch, q->fq_trees_log);
9668c2ecf20Sopenharmony_ci
9678c2ecf20Sopenharmony_ci	return err;
9688c2ecf20Sopenharmony_ci}
9698c2ecf20Sopenharmony_ci
9708c2ecf20Sopenharmony_cistatic int fq_dump(struct Qdisc *sch, struct sk_buff *skb)
9718c2ecf20Sopenharmony_ci{
9728c2ecf20Sopenharmony_ci	struct fq_sched_data *q = qdisc_priv(sch);
9738c2ecf20Sopenharmony_ci	u64 ce_threshold = q->ce_threshold;
9748c2ecf20Sopenharmony_ci	u64 horizon = q->horizon;
9758c2ecf20Sopenharmony_ci	struct nlattr *opts;
9768c2ecf20Sopenharmony_ci
9778c2ecf20Sopenharmony_ci	opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
9788c2ecf20Sopenharmony_ci	if (opts == NULL)
9798c2ecf20Sopenharmony_ci		goto nla_put_failure;
9808c2ecf20Sopenharmony_ci
9818c2ecf20Sopenharmony_ci	/* TCA_FQ_FLOW_DEFAULT_RATE is not used anymore */
9828c2ecf20Sopenharmony_ci
9838c2ecf20Sopenharmony_ci	do_div(ce_threshold, NSEC_PER_USEC);
9848c2ecf20Sopenharmony_ci	do_div(horizon, NSEC_PER_USEC);
9858c2ecf20Sopenharmony_ci
9868c2ecf20Sopenharmony_ci	if (nla_put_u32(skb, TCA_FQ_PLIMIT, sch->limit) ||
9878c2ecf20Sopenharmony_ci	    nla_put_u32(skb, TCA_FQ_FLOW_PLIMIT, q->flow_plimit) ||
9888c2ecf20Sopenharmony_ci	    nla_put_u32(skb, TCA_FQ_QUANTUM, q->quantum) ||
9898c2ecf20Sopenharmony_ci	    nla_put_u32(skb, TCA_FQ_INITIAL_QUANTUM, q->initial_quantum) ||
9908c2ecf20Sopenharmony_ci	    nla_put_u32(skb, TCA_FQ_RATE_ENABLE, q->rate_enable) ||
9918c2ecf20Sopenharmony_ci	    nla_put_u32(skb, TCA_FQ_FLOW_MAX_RATE,
9928c2ecf20Sopenharmony_ci			min_t(unsigned long, q->flow_max_rate, ~0U)) ||
9938c2ecf20Sopenharmony_ci	    nla_put_u32(skb, TCA_FQ_FLOW_REFILL_DELAY,
9948c2ecf20Sopenharmony_ci			jiffies_to_usecs(q->flow_refill_delay)) ||
9958c2ecf20Sopenharmony_ci	    nla_put_u32(skb, TCA_FQ_ORPHAN_MASK, q->orphan_mask) ||
9968c2ecf20Sopenharmony_ci	    nla_put_u32(skb, TCA_FQ_LOW_RATE_THRESHOLD,
9978c2ecf20Sopenharmony_ci			q->low_rate_threshold) ||
9988c2ecf20Sopenharmony_ci	    nla_put_u32(skb, TCA_FQ_CE_THRESHOLD, (u32)ce_threshold) ||
9998c2ecf20Sopenharmony_ci	    nla_put_u32(skb, TCA_FQ_BUCKETS_LOG, q->fq_trees_log) ||
10008c2ecf20Sopenharmony_ci	    nla_put_u32(skb, TCA_FQ_TIMER_SLACK, q->timer_slack) ||
10018c2ecf20Sopenharmony_ci	    nla_put_u32(skb, TCA_FQ_HORIZON, (u32)horizon) ||
10028c2ecf20Sopenharmony_ci	    nla_put_u8(skb, TCA_FQ_HORIZON_DROP, q->horizon_drop))
10038c2ecf20Sopenharmony_ci		goto nla_put_failure;
10048c2ecf20Sopenharmony_ci
10058c2ecf20Sopenharmony_ci	return nla_nest_end(skb, opts);
10068c2ecf20Sopenharmony_ci
10078c2ecf20Sopenharmony_cinla_put_failure:
10088c2ecf20Sopenharmony_ci	return -1;
10098c2ecf20Sopenharmony_ci}
10108c2ecf20Sopenharmony_ci
10118c2ecf20Sopenharmony_cistatic int fq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
10128c2ecf20Sopenharmony_ci{
10138c2ecf20Sopenharmony_ci	struct fq_sched_data *q = qdisc_priv(sch);
10148c2ecf20Sopenharmony_ci	struct tc_fq_qd_stats st;
10158c2ecf20Sopenharmony_ci
10168c2ecf20Sopenharmony_ci	sch_tree_lock(sch);
10178c2ecf20Sopenharmony_ci
10188c2ecf20Sopenharmony_ci	st.gc_flows		  = q->stat_gc_flows;
10198c2ecf20Sopenharmony_ci	st.highprio_packets	  = q->stat_internal_packets;
10208c2ecf20Sopenharmony_ci	st.tcp_retrans		  = 0;
10218c2ecf20Sopenharmony_ci	st.throttled		  = q->stat_throttled;
10228c2ecf20Sopenharmony_ci	st.flows_plimit		  = q->stat_flows_plimit;
10238c2ecf20Sopenharmony_ci	st.pkts_too_long	  = q->stat_pkts_too_long;
10248c2ecf20Sopenharmony_ci	st.allocation_errors	  = q->stat_allocation_errors;
10258c2ecf20Sopenharmony_ci	st.time_next_delayed_flow = q->time_next_delayed_flow + q->timer_slack -
10268c2ecf20Sopenharmony_ci				    ktime_get_ns();
10278c2ecf20Sopenharmony_ci	st.flows		  = q->flows;
10288c2ecf20Sopenharmony_ci	st.inactive_flows	  = q->inactive_flows;
10298c2ecf20Sopenharmony_ci	st.throttled_flows	  = q->throttled_flows;
10308c2ecf20Sopenharmony_ci	st.unthrottle_latency_ns  = min_t(unsigned long,
10318c2ecf20Sopenharmony_ci					  q->unthrottle_latency_ns, ~0U);
10328c2ecf20Sopenharmony_ci	st.ce_mark		  = q->stat_ce_mark;
10338c2ecf20Sopenharmony_ci	st.horizon_drops	  = q->stat_horizon_drops;
10348c2ecf20Sopenharmony_ci	st.horizon_caps		  = q->stat_horizon_caps;
10358c2ecf20Sopenharmony_ci	sch_tree_unlock(sch);
10368c2ecf20Sopenharmony_ci
10378c2ecf20Sopenharmony_ci	return gnet_stats_copy_app(d, &st, sizeof(st));
10388c2ecf20Sopenharmony_ci}
10398c2ecf20Sopenharmony_ci
10408c2ecf20Sopenharmony_cistatic struct Qdisc_ops fq_qdisc_ops __read_mostly = {
10418c2ecf20Sopenharmony_ci	.id		=	"fq",
10428c2ecf20Sopenharmony_ci	.priv_size	=	sizeof(struct fq_sched_data),
10438c2ecf20Sopenharmony_ci
10448c2ecf20Sopenharmony_ci	.enqueue	=	fq_enqueue,
10458c2ecf20Sopenharmony_ci	.dequeue	=	fq_dequeue,
10468c2ecf20Sopenharmony_ci	.peek		=	qdisc_peek_dequeued,
10478c2ecf20Sopenharmony_ci	.init		=	fq_init,
10488c2ecf20Sopenharmony_ci	.reset		=	fq_reset,
10498c2ecf20Sopenharmony_ci	.destroy	=	fq_destroy,
10508c2ecf20Sopenharmony_ci	.change		=	fq_change,
10518c2ecf20Sopenharmony_ci	.dump		=	fq_dump,
10528c2ecf20Sopenharmony_ci	.dump_stats	=	fq_dump_stats,
10538c2ecf20Sopenharmony_ci	.owner		=	THIS_MODULE,
10548c2ecf20Sopenharmony_ci};
10558c2ecf20Sopenharmony_ci
10568c2ecf20Sopenharmony_cistatic int __init fq_module_init(void)
10578c2ecf20Sopenharmony_ci{
10588c2ecf20Sopenharmony_ci	int ret;
10598c2ecf20Sopenharmony_ci
10608c2ecf20Sopenharmony_ci	fq_flow_cachep = kmem_cache_create("fq_flow_cache",
10618c2ecf20Sopenharmony_ci					   sizeof(struct fq_flow),
10628c2ecf20Sopenharmony_ci					   0, 0, NULL);
10638c2ecf20Sopenharmony_ci	if (!fq_flow_cachep)
10648c2ecf20Sopenharmony_ci		return -ENOMEM;
10658c2ecf20Sopenharmony_ci
10668c2ecf20Sopenharmony_ci	ret = register_qdisc(&fq_qdisc_ops);
10678c2ecf20Sopenharmony_ci	if (ret)
10688c2ecf20Sopenharmony_ci		kmem_cache_destroy(fq_flow_cachep);
10698c2ecf20Sopenharmony_ci	return ret;
10708c2ecf20Sopenharmony_ci}
10718c2ecf20Sopenharmony_ci
10728c2ecf20Sopenharmony_cistatic void __exit fq_module_exit(void)
10738c2ecf20Sopenharmony_ci{
10748c2ecf20Sopenharmony_ci	unregister_qdisc(&fq_qdisc_ops);
10758c2ecf20Sopenharmony_ci	kmem_cache_destroy(fq_flow_cachep);
10768c2ecf20Sopenharmony_ci}
10778c2ecf20Sopenharmony_ci
10788c2ecf20Sopenharmony_cimodule_init(fq_module_init)
10798c2ecf20Sopenharmony_cimodule_exit(fq_module_exit)
10808c2ecf20Sopenharmony_ciMODULE_AUTHOR("Eric Dumazet");
10818c2ecf20Sopenharmony_ciMODULE_LICENSE("GPL");
10828c2ecf20Sopenharmony_ciMODULE_DESCRIPTION("Fair Queue Packet Scheduler");
1083