1/* SPDX-License-Identifier: GPL-2.0 */
2#ifndef __NET_SCHED_GENERIC_H
3#define __NET_SCHED_GENERIC_H
4
5#include <linux/netdevice.h>
6#include <linux/types.h>
7#include <linux/rcupdate.h>
8#include <linux/pkt_sched.h>
9#include <linux/pkt_cls.h>
10#include <linux/percpu.h>
11#include <linux/dynamic_queue_limits.h>
12#include <linux/list.h>
13#include <linux/refcount.h>
14#include <linux/workqueue.h>
15#include <linux/mutex.h>
16#include <linux/rwsem.h>
17#include <linux/atomic.h>
18#include <linux/hashtable.h>
19#include <net/gen_stats.h>
20#include <net/rtnetlink.h>
21#include <net/flow_offload.h>
22
23struct Qdisc_ops;
24struct qdisc_walker;
25struct tcf_walker;
26struct module;
27struct bpf_flow_keys;
28
29struct qdisc_rate_table {
30	struct tc_ratespec rate;
31	u32		data[256];
32	struct qdisc_rate_table *next;
33	int		refcnt;
34};
35
36enum qdisc_state_t {
37	__QDISC_STATE_SCHED,
38	__QDISC_STATE_DEACTIVATED,
39	__QDISC_STATE_MISSED,
40	__QDISC_STATE_DRAINING,
41};
42
43enum qdisc_state2_t {
44	/* Only for !TCQ_F_NOLOCK qdisc. Never access it directly.
45	 * Use qdisc_run_begin/end() or qdisc_is_running() instead.
46	 */
47	__QDISC_STATE2_RUNNING,
48};
49
50#define QDISC_STATE_MISSED	BIT(__QDISC_STATE_MISSED)
51#define QDISC_STATE_DRAINING	BIT(__QDISC_STATE_DRAINING)
52
53#define QDISC_STATE_NON_EMPTY	(QDISC_STATE_MISSED | \
54					QDISC_STATE_DRAINING)
55
56struct qdisc_size_table {
57	struct rcu_head		rcu;
58	struct list_head	list;
59	struct tc_sizespec	szopts;
60	int			refcnt;
61	u16			data[];
62};
63
64/* similar to sk_buff_head, but skb->prev pointer is undefined. */
65struct qdisc_skb_head {
66	struct sk_buff	*head;
67	struct sk_buff	*tail;
68	__u32		qlen;
69	spinlock_t	lock;
70};
71
72struct Qdisc {
73	int 			(*enqueue)(struct sk_buff *skb,
74					   struct Qdisc *sch,
75					   struct sk_buff **to_free);
76	struct sk_buff *	(*dequeue)(struct Qdisc *sch);
77	unsigned int		flags;
78#define TCQ_F_BUILTIN		1
79#define TCQ_F_INGRESS		2
80#define TCQ_F_CAN_BYPASS	4
81#define TCQ_F_MQROOT		8
82#define TCQ_F_ONETXQUEUE	0x10 /* dequeue_skb() can assume all skbs are for
83				      * q->dev_queue : It can test
84				      * netif_xmit_frozen_or_stopped() before
85				      * dequeueing next packet.
86				      * Its true for MQ/MQPRIO slaves, or non
87				      * multiqueue device.
88				      */
89#define TCQ_F_WARN_NONWC	(1 << 16)
90#define TCQ_F_CPUSTATS		0x20 /* run using percpu statistics */
91#define TCQ_F_NOPARENT		0x40 /* root of its hierarchy :
92				      * qdisc_tree_decrease_qlen() should stop.
93				      */
94#define TCQ_F_INVISIBLE		0x80 /* invisible by default in dump */
95#define TCQ_F_NOLOCK		0x100 /* qdisc does not require locking */
96#define TCQ_F_OFFLOADED		0x200 /* qdisc is offloaded to HW */
97	u32			limit;
98	const struct Qdisc_ops	*ops;
99	struct qdisc_size_table	__rcu *stab;
100	struct hlist_node       hash;
101	u32			handle;
102	u32			parent;
103
104	struct netdev_queue	*dev_queue;
105
106	struct net_rate_estimator __rcu *rate_est;
107	struct gnet_stats_basic_sync __percpu *cpu_bstats;
108	struct gnet_stats_queue	__percpu *cpu_qstats;
109	int			pad;
110	refcount_t		refcnt;
111
112	/*
113	 * For performance sake on SMP, we put highly modified fields at the end
114	 */
115	struct sk_buff_head	gso_skb ____cacheline_aligned_in_smp;
116	struct qdisc_skb_head	q;
117	struct gnet_stats_basic_sync bstats;
118	struct gnet_stats_queue	qstats;
119	unsigned long		state;
120	unsigned long		state2; /* must be written under qdisc spinlock */
121	struct Qdisc            *next_sched;
122	struct sk_buff_head	skb_bad_txq;
123
124	spinlock_t		busylock ____cacheline_aligned_in_smp;
125	spinlock_t		seqlock;
126
127	struct rcu_head		rcu;
128	netdevice_tracker	dev_tracker;
129	/* private data */
130	long privdata[] ____cacheline_aligned;
131};
132
133static inline void qdisc_refcount_inc(struct Qdisc *qdisc)
134{
135	if (qdisc->flags & TCQ_F_BUILTIN)
136		return;
137	refcount_inc(&qdisc->refcnt);
138}
139
140static inline bool qdisc_refcount_dec_if_one(struct Qdisc *qdisc)
141{
142	if (qdisc->flags & TCQ_F_BUILTIN)
143		return true;
144	return refcount_dec_if_one(&qdisc->refcnt);
145}
146
147/* Intended to be used by unlocked users, when concurrent qdisc release is
148 * possible.
149 */
150
151static inline struct Qdisc *qdisc_refcount_inc_nz(struct Qdisc *qdisc)
152{
153	if (qdisc->flags & TCQ_F_BUILTIN)
154		return qdisc;
155	if (refcount_inc_not_zero(&qdisc->refcnt))
156		return qdisc;
157	return NULL;
158}
159
160/* For !TCQ_F_NOLOCK qdisc: callers must either call this within a qdisc
161 * root_lock section, or provide their own memory barriers -- ordering
162 * against qdisc_run_begin/end() atomic bit operations.
163 */
164static inline bool qdisc_is_running(struct Qdisc *qdisc)
165{
166	if (qdisc->flags & TCQ_F_NOLOCK)
167		return spin_is_locked(&qdisc->seqlock);
168	return test_bit(__QDISC_STATE2_RUNNING, &qdisc->state2);
169}
170
171static inline bool nolock_qdisc_is_empty(const struct Qdisc *qdisc)
172{
173	return !(READ_ONCE(qdisc->state) & QDISC_STATE_NON_EMPTY);
174}
175
176static inline bool qdisc_is_percpu_stats(const struct Qdisc *q)
177{
178	return q->flags & TCQ_F_CPUSTATS;
179}
180
181static inline bool qdisc_is_empty(const struct Qdisc *qdisc)
182{
183	if (qdisc_is_percpu_stats(qdisc))
184		return nolock_qdisc_is_empty(qdisc);
185	return !READ_ONCE(qdisc->q.qlen);
186}
187
188/* For !TCQ_F_NOLOCK qdisc, qdisc_run_begin/end() must be invoked with
189 * the qdisc root lock acquired.
190 */
191static inline bool qdisc_run_begin(struct Qdisc *qdisc)
192{
193	if (qdisc->flags & TCQ_F_NOLOCK) {
194		if (spin_trylock(&qdisc->seqlock))
195			return true;
196
197		/* No need to insist if the MISSED flag was already set.
198		 * Note that test_and_set_bit() also gives us memory ordering
199		 * guarantees wrt potential earlier enqueue() and below
200		 * spin_trylock(), both of which are necessary to prevent races
201		 */
202		if (test_and_set_bit(__QDISC_STATE_MISSED, &qdisc->state))
203			return false;
204
205		/* Try to take the lock again to make sure that we will either
206		 * grab it or the CPU that still has it will see MISSED set
207		 * when testing it in qdisc_run_end()
208		 */
209		return spin_trylock(&qdisc->seqlock);
210	}
211	return !__test_and_set_bit(__QDISC_STATE2_RUNNING, &qdisc->state2);
212}
213
214static inline void qdisc_run_end(struct Qdisc *qdisc)
215{
216	if (qdisc->flags & TCQ_F_NOLOCK) {
217		spin_unlock(&qdisc->seqlock);
218
219		/* spin_unlock() only has store-release semantic. The unlock
220		 * and test_bit() ordering is a store-load ordering, so a full
221		 * memory barrier is needed here.
222		 */
223		smp_mb();
224
225		if (unlikely(test_bit(__QDISC_STATE_MISSED,
226				      &qdisc->state)))
227			__netif_schedule(qdisc);
228	} else {
229		__clear_bit(__QDISC_STATE2_RUNNING, &qdisc->state2);
230	}
231}
232
233static inline bool qdisc_may_bulk(const struct Qdisc *qdisc)
234{
235	return qdisc->flags & TCQ_F_ONETXQUEUE;
236}
237
238static inline int qdisc_avail_bulklimit(const struct netdev_queue *txq)
239{
240#ifdef CONFIG_BQL
241	/* Non-BQL migrated drivers will return 0, too. */
242	return dql_avail(&txq->dql);
243#else
244	return 0;
245#endif
246}
247
248struct Qdisc_class_ops {
249	unsigned int		flags;
250	/* Child qdisc manipulation */
251	struct netdev_queue *	(*select_queue)(struct Qdisc *, struct tcmsg *);
252	int			(*graft)(struct Qdisc *, unsigned long cl,
253					struct Qdisc *, struct Qdisc **,
254					struct netlink_ext_ack *extack);
255	struct Qdisc *		(*leaf)(struct Qdisc *, unsigned long cl);
256	void			(*qlen_notify)(struct Qdisc *, unsigned long);
257
258	/* Class manipulation routines */
259	unsigned long		(*find)(struct Qdisc *, u32 classid);
260	int			(*change)(struct Qdisc *, u32, u32,
261					struct nlattr **, unsigned long *,
262					struct netlink_ext_ack *);
263	int			(*delete)(struct Qdisc *, unsigned long,
264					  struct netlink_ext_ack *);
265	void			(*walk)(struct Qdisc *, struct qdisc_walker * arg);
266
267	/* Filter manipulation */
268	struct tcf_block *	(*tcf_block)(struct Qdisc *sch,
269					     unsigned long arg,
270					     struct netlink_ext_ack *extack);
271	unsigned long		(*bind_tcf)(struct Qdisc *, unsigned long,
272					u32 classid);
273	void			(*unbind_tcf)(struct Qdisc *, unsigned long);
274
275	/* rtnetlink specific */
276	int			(*dump)(struct Qdisc *, unsigned long,
277					struct sk_buff *skb, struct tcmsg*);
278	int			(*dump_stats)(struct Qdisc *, unsigned long,
279					struct gnet_dump *);
280};
281
282/* Qdisc_class_ops flag values */
283
284/* Implements API that doesn't require rtnl lock */
285enum qdisc_class_ops_flags {
286	QDISC_CLASS_OPS_DOIT_UNLOCKED = 1,
287};
288
289struct Qdisc_ops {
290	struct Qdisc_ops	*next;
291	const struct Qdisc_class_ops	*cl_ops;
292	char			id[IFNAMSIZ];
293	int			priv_size;
294	unsigned int		static_flags;
295
296	int 			(*enqueue)(struct sk_buff *skb,
297					   struct Qdisc *sch,
298					   struct sk_buff **to_free);
299	struct sk_buff *	(*dequeue)(struct Qdisc *);
300	struct sk_buff *	(*peek)(struct Qdisc *);
301
302	int			(*init)(struct Qdisc *sch, struct nlattr *arg,
303					struct netlink_ext_ack *extack);
304	void			(*reset)(struct Qdisc *);
305	void			(*destroy)(struct Qdisc *);
306	int			(*change)(struct Qdisc *sch,
307					  struct nlattr *arg,
308					  struct netlink_ext_ack *extack);
309	void			(*attach)(struct Qdisc *sch);
310	int			(*change_tx_queue_len)(struct Qdisc *, unsigned int);
311	void			(*change_real_num_tx)(struct Qdisc *sch,
312						      unsigned int new_real_tx);
313
314	int			(*dump)(struct Qdisc *, struct sk_buff *);
315	int			(*dump_stats)(struct Qdisc *, struct gnet_dump *);
316
317	void			(*ingress_block_set)(struct Qdisc *sch,
318						     u32 block_index);
319	void			(*egress_block_set)(struct Qdisc *sch,
320						    u32 block_index);
321	u32			(*ingress_block_get)(struct Qdisc *sch);
322	u32			(*egress_block_get)(struct Qdisc *sch);
323
324	struct module		*owner;
325};
326
327
328struct tcf_result {
329	union {
330		struct {
331			unsigned long	class;
332			u32		classid;
333		};
334		const struct tcf_proto *goto_tp;
335
336	};
337};
338
339struct tcf_chain;
340
341struct tcf_proto_ops {
342	struct list_head	head;
343	char			kind[IFNAMSIZ];
344
345	int			(*classify)(struct sk_buff *,
346					    const struct tcf_proto *,
347					    struct tcf_result *);
348	int			(*init)(struct tcf_proto*);
349	void			(*destroy)(struct tcf_proto *tp, bool rtnl_held,
350					   struct netlink_ext_ack *extack);
351
352	void*			(*get)(struct tcf_proto*, u32 handle);
353	void			(*put)(struct tcf_proto *tp, void *f);
354	int			(*change)(struct net *net, struct sk_buff *,
355					struct tcf_proto*, unsigned long,
356					u32 handle, struct nlattr **,
357					void **, u32,
358					struct netlink_ext_ack *);
359	int			(*delete)(struct tcf_proto *tp, void *arg,
360					  bool *last, bool rtnl_held,
361					  struct netlink_ext_ack *);
362	bool			(*delete_empty)(struct tcf_proto *tp);
363	void			(*walk)(struct tcf_proto *tp,
364					struct tcf_walker *arg, bool rtnl_held);
365	int			(*reoffload)(struct tcf_proto *tp, bool add,
366					     flow_setup_cb_t *cb, void *cb_priv,
367					     struct netlink_ext_ack *extack);
368	void			(*hw_add)(struct tcf_proto *tp,
369					  void *type_data);
370	void			(*hw_del)(struct tcf_proto *tp,
371					  void *type_data);
372	void			(*bind_class)(void *, u32, unsigned long,
373					      void *, unsigned long);
374	void *			(*tmplt_create)(struct net *net,
375						struct tcf_chain *chain,
376						struct nlattr **tca,
377						struct netlink_ext_ack *extack);
378	void			(*tmplt_destroy)(void *tmplt_priv);
379	void			(*tmplt_reoffload)(struct tcf_chain *chain,
380						   bool add,
381						   flow_setup_cb_t *cb,
382						   void *cb_priv);
383	struct tcf_exts *	(*get_exts)(const struct tcf_proto *tp,
384					    u32 handle);
385
386	/* rtnetlink specific */
387	int			(*dump)(struct net*, struct tcf_proto*, void *,
388					struct sk_buff *skb, struct tcmsg*,
389					bool);
390	int			(*terse_dump)(struct net *net,
391					      struct tcf_proto *tp, void *fh,
392					      struct sk_buff *skb,
393					      struct tcmsg *t, bool rtnl_held);
394	int			(*tmplt_dump)(struct sk_buff *skb,
395					      struct net *net,
396					      void *tmplt_priv);
397
398	struct module		*owner;
399	int			flags;
400};
401
402/* Classifiers setting TCF_PROTO_OPS_DOIT_UNLOCKED in tcf_proto_ops->flags
403 * are expected to implement tcf_proto_ops->delete_empty(), otherwise race
404 * conditions can occur when filters are inserted/deleted simultaneously.
405 */
406enum tcf_proto_ops_flags {
407	TCF_PROTO_OPS_DOIT_UNLOCKED = 1,
408};
409
410struct tcf_proto {
411	/* Fast access part */
412	struct tcf_proto __rcu	*next;
413	void __rcu		*root;
414
415	/* called under RCU BH lock*/
416	int			(*classify)(struct sk_buff *,
417					    const struct tcf_proto *,
418					    struct tcf_result *);
419	__be16			protocol;
420
421	/* All the rest */
422	u32			prio;
423	void			*data;
424	const struct tcf_proto_ops	*ops;
425	struct tcf_chain	*chain;
426	/* Lock protects tcf_proto shared state and can be used by unlocked
427	 * classifiers to protect their private data.
428	 */
429	spinlock_t		lock;
430	bool			deleting;
431	refcount_t		refcnt;
432	struct rcu_head		rcu;
433	struct hlist_node	destroy_ht_node;
434};
435
436struct qdisc_skb_cb {
437	struct {
438		unsigned int		pkt_len;
439		u16			slave_dev_queue_mapping;
440		u16			tc_classid;
441	};
442#define QDISC_CB_PRIV_LEN 20
443	unsigned char		data[QDISC_CB_PRIV_LEN];
444};
445
446typedef void tcf_chain_head_change_t(struct tcf_proto *tp_head, void *priv);
447
448struct tcf_chain {
449	/* Protects filter_chain. */
450	struct mutex filter_chain_lock;
451	struct tcf_proto __rcu *filter_chain;
452	struct list_head list;
453	struct tcf_block *block;
454	u32 index; /* chain index */
455	unsigned int refcnt;
456	unsigned int action_refcnt;
457	bool explicitly_created;
458	bool flushing;
459	const struct tcf_proto_ops *tmplt_ops;
460	void *tmplt_priv;
461	struct rcu_head rcu;
462};
463
464struct tcf_block {
465	/* Lock protects tcf_block and lifetime-management data of chains
466	 * attached to the block (refcnt, action_refcnt, explicitly_created).
467	 */
468	struct mutex lock;
469	struct list_head chain_list;
470	u32 index; /* block index for shared blocks */
471	u32 classid; /* which class this block belongs to */
472	refcount_t refcnt;
473	struct net *net;
474	struct Qdisc *q;
475	struct rw_semaphore cb_lock; /* protects cb_list and offload counters */
476	struct flow_block flow_block;
477	struct list_head owner_list;
478	bool keep_dst;
479	atomic_t offloadcnt; /* Number of oddloaded filters */
480	unsigned int nooffloaddevcnt; /* Number of devs unable to do offload */
481	unsigned int lockeddevcnt; /* Number of devs that require rtnl lock. */
482	struct {
483		struct tcf_chain *chain;
484		struct list_head filter_chain_list;
485	} chain0;
486	struct rcu_head rcu;
487	DECLARE_HASHTABLE(proto_destroy_ht, 7);
488	struct mutex proto_destroy_lock; /* Lock for proto_destroy hashtable. */
489};
490
491static inline bool lockdep_tcf_chain_is_locked(struct tcf_chain *chain)
492{
493	return lockdep_is_held(&chain->filter_chain_lock);
494}
495
496static inline bool lockdep_tcf_proto_is_locked(struct tcf_proto *tp)
497{
498	return lockdep_is_held(&tp->lock);
499}
500
501#define tcf_chain_dereference(p, chain)					\
502	rcu_dereference_protected(p, lockdep_tcf_chain_is_locked(chain))
503
504#define tcf_proto_dereference(p, tp)					\
505	rcu_dereference_protected(p, lockdep_tcf_proto_is_locked(tp))
506
507static inline void qdisc_cb_private_validate(const struct sk_buff *skb, int sz)
508{
509	struct qdisc_skb_cb *qcb;
510
511	BUILD_BUG_ON(sizeof(skb->cb) < sizeof(*qcb));
512	BUILD_BUG_ON(sizeof(qcb->data) < sz);
513}
514
515static inline int qdisc_qlen(const struct Qdisc *q)
516{
517	return q->q.qlen;
518}
519
520static inline int qdisc_qlen_sum(const struct Qdisc *q)
521{
522	__u32 qlen = q->qstats.qlen;
523	int i;
524
525	if (qdisc_is_percpu_stats(q)) {
526		for_each_possible_cpu(i)
527			qlen += per_cpu_ptr(q->cpu_qstats, i)->qlen;
528	} else {
529		qlen += q->q.qlen;
530	}
531
532	return qlen;
533}
534
535static inline struct qdisc_skb_cb *qdisc_skb_cb(const struct sk_buff *skb)
536{
537	return (struct qdisc_skb_cb *)skb->cb;
538}
539
540static inline spinlock_t *qdisc_lock(struct Qdisc *qdisc)
541{
542	return &qdisc->q.lock;
543}
544
545static inline struct Qdisc *qdisc_root(const struct Qdisc *qdisc)
546{
547	struct Qdisc *q = rcu_dereference_rtnl(qdisc->dev_queue->qdisc);
548
549	return q;
550}
551
552static inline struct Qdisc *qdisc_root_bh(const struct Qdisc *qdisc)
553{
554	return rcu_dereference_bh(qdisc->dev_queue->qdisc);
555}
556
557static inline struct Qdisc *qdisc_root_sleeping(const struct Qdisc *qdisc)
558{
559	return rcu_dereference_rtnl(qdisc->dev_queue->qdisc_sleeping);
560}
561
562static inline spinlock_t *qdisc_root_sleeping_lock(const struct Qdisc *qdisc)
563{
564	struct Qdisc *root = qdisc_root_sleeping(qdisc);
565
566	ASSERT_RTNL();
567	return qdisc_lock(root);
568}
569
570static inline struct net_device *qdisc_dev(const struct Qdisc *qdisc)
571{
572	return qdisc->dev_queue->dev;
573}
574
575static inline void sch_tree_lock(struct Qdisc *q)
576{
577	if (q->flags & TCQ_F_MQROOT)
578		spin_lock_bh(qdisc_lock(q));
579	else
580		spin_lock_bh(qdisc_root_sleeping_lock(q));
581}
582
583static inline void sch_tree_unlock(struct Qdisc *q)
584{
585	if (q->flags & TCQ_F_MQROOT)
586		spin_unlock_bh(qdisc_lock(q));
587	else
588		spin_unlock_bh(qdisc_root_sleeping_lock(q));
589}
590
591extern struct Qdisc noop_qdisc;
592extern struct Qdisc_ops noop_qdisc_ops;
593extern struct Qdisc_ops pfifo_fast_ops;
594extern struct Qdisc_ops mq_qdisc_ops;
595extern struct Qdisc_ops noqueue_qdisc_ops;
596extern const struct Qdisc_ops *default_qdisc_ops;
597static inline const struct Qdisc_ops *
598get_default_qdisc_ops(const struct net_device *dev, int ntx)
599{
600	return ntx < dev->real_num_tx_queues ?
601			default_qdisc_ops : &pfifo_fast_ops;
602}
603
604struct Qdisc_class_common {
605	u32			classid;
606	unsigned int		filter_cnt;
607	struct hlist_node	hnode;
608};
609
610struct Qdisc_class_hash {
611	struct hlist_head	*hash;
612	unsigned int		hashsize;
613	unsigned int		hashmask;
614	unsigned int		hashelems;
615};
616
617static inline unsigned int qdisc_class_hash(u32 id, u32 mask)
618{
619	id ^= id >> 8;
620	id ^= id >> 4;
621	return id & mask;
622}
623
624static inline struct Qdisc_class_common *
625qdisc_class_find(const struct Qdisc_class_hash *hash, u32 id)
626{
627	struct Qdisc_class_common *cl;
628	unsigned int h;
629
630	if (!id)
631		return NULL;
632
633	h = qdisc_class_hash(id, hash->hashmask);
634	hlist_for_each_entry(cl, &hash->hash[h], hnode) {
635		if (cl->classid == id)
636			return cl;
637	}
638	return NULL;
639}
640
641static inline bool qdisc_class_in_use(const struct Qdisc_class_common *cl)
642{
643	return cl->filter_cnt > 0;
644}
645
646static inline void qdisc_class_get(struct Qdisc_class_common *cl)
647{
648	unsigned int res;
649
650	if (check_add_overflow(cl->filter_cnt, 1, &res))
651		WARN(1, "Qdisc class overflow");
652
653	cl->filter_cnt = res;
654}
655
656static inline void qdisc_class_put(struct Qdisc_class_common *cl)
657{
658	unsigned int res;
659
660	if (check_sub_overflow(cl->filter_cnt, 1, &res))
661		WARN(1, "Qdisc class underflow");
662
663	cl->filter_cnt = res;
664}
665
666static inline int tc_classid_to_hwtc(struct net_device *dev, u32 classid)
667{
668	u32 hwtc = TC_H_MIN(classid) - TC_H_MIN_PRIORITY;
669
670	return (hwtc < netdev_get_num_tc(dev)) ? hwtc : -EINVAL;
671}
672
673int qdisc_class_hash_init(struct Qdisc_class_hash *);
674void qdisc_class_hash_insert(struct Qdisc_class_hash *,
675			     struct Qdisc_class_common *);
676void qdisc_class_hash_remove(struct Qdisc_class_hash *,
677			     struct Qdisc_class_common *);
678void qdisc_class_hash_grow(struct Qdisc *, struct Qdisc_class_hash *);
679void qdisc_class_hash_destroy(struct Qdisc_class_hash *);
680
681int dev_qdisc_change_tx_queue_len(struct net_device *dev);
682void dev_qdisc_change_real_num_tx(struct net_device *dev,
683				  unsigned int new_real_tx);
684void dev_init_scheduler(struct net_device *dev);
685void dev_shutdown(struct net_device *dev);
686void dev_activate(struct net_device *dev);
687void dev_deactivate(struct net_device *dev);
688void dev_deactivate_many(struct list_head *head);
689struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue,
690			      struct Qdisc *qdisc);
691void qdisc_reset(struct Qdisc *qdisc);
692void qdisc_destroy(struct Qdisc *qdisc);
693void qdisc_put(struct Qdisc *qdisc);
694void qdisc_put_unlocked(struct Qdisc *qdisc);
695void qdisc_tree_reduce_backlog(struct Qdisc *qdisc, int n, int len);
696#ifdef CONFIG_NET_SCHED
697int qdisc_offload_dump_helper(struct Qdisc *q, enum tc_setup_type type,
698			      void *type_data);
699void qdisc_offload_graft_helper(struct net_device *dev, struct Qdisc *sch,
700				struct Qdisc *new, struct Qdisc *old,
701				enum tc_setup_type type, void *type_data,
702				struct netlink_ext_ack *extack);
703#else
704static inline int
705qdisc_offload_dump_helper(struct Qdisc *q, enum tc_setup_type type,
706			  void *type_data)
707{
708	q->flags &= ~TCQ_F_OFFLOADED;
709	return 0;
710}
711
712static inline void
713qdisc_offload_graft_helper(struct net_device *dev, struct Qdisc *sch,
714			   struct Qdisc *new, struct Qdisc *old,
715			   enum tc_setup_type type, void *type_data,
716			   struct netlink_ext_ack *extack)
717{
718}
719#endif
720void qdisc_offload_query_caps(struct net_device *dev,
721			      enum tc_setup_type type,
722			      void *caps, size_t caps_len);
723struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
724			  const struct Qdisc_ops *ops,
725			  struct netlink_ext_ack *extack);
726void qdisc_free(struct Qdisc *qdisc);
727struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue,
728				const struct Qdisc_ops *ops, u32 parentid,
729				struct netlink_ext_ack *extack);
730void __qdisc_calculate_pkt_len(struct sk_buff *skb,
731			       const struct qdisc_size_table *stab);
732int skb_do_redirect(struct sk_buff *);
733
734static inline bool skb_at_tc_ingress(const struct sk_buff *skb)
735{
736#ifdef CONFIG_NET_XGRESS
737	return skb->tc_at_ingress;
738#else
739	return false;
740#endif
741}
742
743static inline bool skb_skip_tc_classify(struct sk_buff *skb)
744{
745#ifdef CONFIG_NET_CLS_ACT
746	if (skb->tc_skip_classify) {
747		skb->tc_skip_classify = 0;
748		return true;
749	}
750#endif
751	return false;
752}
753
754/* Reset all TX qdiscs greater than index of a device.  */
755static inline void qdisc_reset_all_tx_gt(struct net_device *dev, unsigned int i)
756{
757	struct Qdisc *qdisc;
758
759	for (; i < dev->num_tx_queues; i++) {
760		qdisc = rtnl_dereference(netdev_get_tx_queue(dev, i)->qdisc);
761		if (qdisc) {
762			spin_lock_bh(qdisc_lock(qdisc));
763			qdisc_reset(qdisc);
764			spin_unlock_bh(qdisc_lock(qdisc));
765		}
766	}
767}
768
769/* Are all TX queues of the device empty?  */
770static inline bool qdisc_all_tx_empty(const struct net_device *dev)
771{
772	unsigned int i;
773
774	rcu_read_lock();
775	for (i = 0; i < dev->num_tx_queues; i++) {
776		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
777		const struct Qdisc *q = rcu_dereference(txq->qdisc);
778
779		if (!qdisc_is_empty(q)) {
780			rcu_read_unlock();
781			return false;
782		}
783	}
784	rcu_read_unlock();
785	return true;
786}
787
788/* Are any of the TX qdiscs changing?  */
789static inline bool qdisc_tx_changing(const struct net_device *dev)
790{
791	unsigned int i;
792
793	for (i = 0; i < dev->num_tx_queues; i++) {
794		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
795
796		if (rcu_access_pointer(txq->qdisc) !=
797		    rcu_access_pointer(txq->qdisc_sleeping))
798			return true;
799	}
800	return false;
801}
802
803/* Is the device using the noop qdisc on all queues?  */
804static inline bool qdisc_tx_is_noop(const struct net_device *dev)
805{
806	unsigned int i;
807
808	for (i = 0; i < dev->num_tx_queues; i++) {
809		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
810		if (rcu_access_pointer(txq->qdisc) != &noop_qdisc)
811			return false;
812	}
813	return true;
814}
815
816static inline unsigned int qdisc_pkt_len(const struct sk_buff *skb)
817{
818	return qdisc_skb_cb(skb)->pkt_len;
819}
820
821/* additional qdisc xmit flags (NET_XMIT_MASK in linux/netdevice.h) */
822enum net_xmit_qdisc_t {
823	__NET_XMIT_STOLEN = 0x00010000,
824	__NET_XMIT_BYPASS = 0x00020000,
825};
826
827#ifdef CONFIG_NET_CLS_ACT
828#define net_xmit_drop_count(e)	((e) & __NET_XMIT_STOLEN ? 0 : 1)
829#else
830#define net_xmit_drop_count(e)	(1)
831#endif
832
833static inline void qdisc_calculate_pkt_len(struct sk_buff *skb,
834					   const struct Qdisc *sch)
835{
836#ifdef CONFIG_NET_SCHED
837	struct qdisc_size_table *stab = rcu_dereference_bh(sch->stab);
838
839	if (stab)
840		__qdisc_calculate_pkt_len(skb, stab);
841#endif
842}
843
844static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
845				struct sk_buff **to_free)
846{
847	qdisc_calculate_pkt_len(skb, sch);
848	return sch->enqueue(skb, sch, to_free);
849}
850
851static inline void _bstats_update(struct gnet_stats_basic_sync *bstats,
852				  __u64 bytes, __u32 packets)
853{
854	u64_stats_update_begin(&bstats->syncp);
855	u64_stats_add(&bstats->bytes, bytes);
856	u64_stats_add(&bstats->packets, packets);
857	u64_stats_update_end(&bstats->syncp);
858}
859
860static inline void bstats_update(struct gnet_stats_basic_sync *bstats,
861				 const struct sk_buff *skb)
862{
863	_bstats_update(bstats,
864		       qdisc_pkt_len(skb),
865		       skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1);
866}
867
868static inline void qdisc_bstats_cpu_update(struct Qdisc *sch,
869					   const struct sk_buff *skb)
870{
871	bstats_update(this_cpu_ptr(sch->cpu_bstats), skb);
872}
873
874static inline void qdisc_bstats_update(struct Qdisc *sch,
875				       const struct sk_buff *skb)
876{
877	bstats_update(&sch->bstats, skb);
878}
879
880static inline void qdisc_qstats_backlog_dec(struct Qdisc *sch,
881					    const struct sk_buff *skb)
882{
883	sch->qstats.backlog -= qdisc_pkt_len(skb);
884}
885
886static inline void qdisc_qstats_cpu_backlog_dec(struct Qdisc *sch,
887						const struct sk_buff *skb)
888{
889	this_cpu_sub(sch->cpu_qstats->backlog, qdisc_pkt_len(skb));
890}
891
892static inline void qdisc_qstats_backlog_inc(struct Qdisc *sch,
893					    const struct sk_buff *skb)
894{
895	sch->qstats.backlog += qdisc_pkt_len(skb);
896}
897
898static inline void qdisc_qstats_cpu_backlog_inc(struct Qdisc *sch,
899						const struct sk_buff *skb)
900{
901	this_cpu_add(sch->cpu_qstats->backlog, qdisc_pkt_len(skb));
902}
903
904static inline void qdisc_qstats_cpu_qlen_inc(struct Qdisc *sch)
905{
906	this_cpu_inc(sch->cpu_qstats->qlen);
907}
908
909static inline void qdisc_qstats_cpu_qlen_dec(struct Qdisc *sch)
910{
911	this_cpu_dec(sch->cpu_qstats->qlen);
912}
913
914static inline void qdisc_qstats_cpu_requeues_inc(struct Qdisc *sch)
915{
916	this_cpu_inc(sch->cpu_qstats->requeues);
917}
918
919static inline void __qdisc_qstats_drop(struct Qdisc *sch, int count)
920{
921	sch->qstats.drops += count;
922}
923
924static inline void qstats_drop_inc(struct gnet_stats_queue *qstats)
925{
926	qstats->drops++;
927}
928
929static inline void qstats_overlimit_inc(struct gnet_stats_queue *qstats)
930{
931	qstats->overlimits++;
932}
933
934static inline void qdisc_qstats_drop(struct Qdisc *sch)
935{
936	qstats_drop_inc(&sch->qstats);
937}
938
939static inline void qdisc_qstats_cpu_drop(struct Qdisc *sch)
940{
941	this_cpu_inc(sch->cpu_qstats->drops);
942}
943
944static inline void qdisc_qstats_overlimit(struct Qdisc *sch)
945{
946	sch->qstats.overlimits++;
947}
948
949static inline int qdisc_qstats_copy(struct gnet_dump *d, struct Qdisc *sch)
950{
951	__u32 qlen = qdisc_qlen_sum(sch);
952
953	return gnet_stats_copy_queue(d, sch->cpu_qstats, &sch->qstats, qlen);
954}
955
956static inline void qdisc_qstats_qlen_backlog(struct Qdisc *sch,  __u32 *qlen,
957					     __u32 *backlog)
958{
959	struct gnet_stats_queue qstats = { 0 };
960
961	gnet_stats_add_queue(&qstats, sch->cpu_qstats, &sch->qstats);
962	*qlen = qstats.qlen + qdisc_qlen(sch);
963	*backlog = qstats.backlog;
964}
965
966static inline void qdisc_tree_flush_backlog(struct Qdisc *sch)
967{
968	__u32 qlen, backlog;
969
970	qdisc_qstats_qlen_backlog(sch, &qlen, &backlog);
971	qdisc_tree_reduce_backlog(sch, qlen, backlog);
972}
973
974static inline void qdisc_purge_queue(struct Qdisc *sch)
975{
976	__u32 qlen, backlog;
977
978	qdisc_qstats_qlen_backlog(sch, &qlen, &backlog);
979	qdisc_reset(sch);
980	qdisc_tree_reduce_backlog(sch, qlen, backlog);
981}
982
983static inline void __qdisc_enqueue_tail(struct sk_buff *skb,
984					struct qdisc_skb_head *qh)
985{
986	struct sk_buff *last = qh->tail;
987
988	if (last) {
989		skb->next = NULL;
990		last->next = skb;
991		qh->tail = skb;
992	} else {
993		qh->tail = skb;
994		qh->head = skb;
995	}
996	qh->qlen++;
997}
998
999static inline int qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch)
1000{
1001	__qdisc_enqueue_tail(skb, &sch->q);
1002	qdisc_qstats_backlog_inc(sch, skb);
1003	return NET_XMIT_SUCCESS;
1004}
1005
1006static inline void __qdisc_enqueue_head(struct sk_buff *skb,
1007					struct qdisc_skb_head *qh)
1008{
1009	skb->next = qh->head;
1010
1011	if (!qh->head)
1012		qh->tail = skb;
1013	qh->head = skb;
1014	qh->qlen++;
1015}
1016
1017static inline struct sk_buff *__qdisc_dequeue_head(struct qdisc_skb_head *qh)
1018{
1019	struct sk_buff *skb = qh->head;
1020
1021	if (likely(skb != NULL)) {
1022		qh->head = skb->next;
1023		qh->qlen--;
1024		if (qh->head == NULL)
1025			qh->tail = NULL;
1026		skb->next = NULL;
1027	}
1028
1029	return skb;
1030}
1031
1032static inline struct sk_buff *qdisc_dequeue_head(struct Qdisc *sch)
1033{
1034	struct sk_buff *skb = __qdisc_dequeue_head(&sch->q);
1035
1036	if (likely(skb != NULL)) {
1037		qdisc_qstats_backlog_dec(sch, skb);
1038		qdisc_bstats_update(sch, skb);
1039	}
1040
1041	return skb;
1042}
1043
1044/* Instead of calling kfree_skb() while root qdisc lock is held,
1045 * queue the skb for future freeing at end of __dev_xmit_skb()
1046 */
1047static inline void __qdisc_drop(struct sk_buff *skb, struct sk_buff **to_free)
1048{
1049	skb->next = *to_free;
1050	*to_free = skb;
1051}
1052
1053static inline void __qdisc_drop_all(struct sk_buff *skb,
1054				    struct sk_buff **to_free)
1055{
1056	if (skb->prev)
1057		skb->prev->next = *to_free;
1058	else
1059		skb->next = *to_free;
1060	*to_free = skb;
1061}
1062
1063static inline unsigned int __qdisc_queue_drop_head(struct Qdisc *sch,
1064						   struct qdisc_skb_head *qh,
1065						   struct sk_buff **to_free)
1066{
1067	struct sk_buff *skb = __qdisc_dequeue_head(qh);
1068
1069	if (likely(skb != NULL)) {
1070		unsigned int len = qdisc_pkt_len(skb);
1071
1072		qdisc_qstats_backlog_dec(sch, skb);
1073		__qdisc_drop(skb, to_free);
1074		return len;
1075	}
1076
1077	return 0;
1078}
1079
1080static inline struct sk_buff *qdisc_peek_head(struct Qdisc *sch)
1081{
1082	const struct qdisc_skb_head *qh = &sch->q;
1083
1084	return qh->head;
1085}
1086
1087/* generic pseudo peek method for non-work-conserving qdisc */
1088static inline struct sk_buff *qdisc_peek_dequeued(struct Qdisc *sch)
1089{
1090	struct sk_buff *skb = skb_peek(&sch->gso_skb);
1091
1092	/* we can reuse ->gso_skb because peek isn't called for root qdiscs */
1093	if (!skb) {
1094		skb = sch->dequeue(sch);
1095
1096		if (skb) {
1097			__skb_queue_head(&sch->gso_skb, skb);
1098			/* it's still part of the queue */
1099			qdisc_qstats_backlog_inc(sch, skb);
1100			sch->q.qlen++;
1101		}
1102	}
1103
1104	return skb;
1105}
1106
1107static inline void qdisc_update_stats_at_dequeue(struct Qdisc *sch,
1108						 struct sk_buff *skb)
1109{
1110	if (qdisc_is_percpu_stats(sch)) {
1111		qdisc_qstats_cpu_backlog_dec(sch, skb);
1112		qdisc_bstats_cpu_update(sch, skb);
1113		qdisc_qstats_cpu_qlen_dec(sch);
1114	} else {
1115		qdisc_qstats_backlog_dec(sch, skb);
1116		qdisc_bstats_update(sch, skb);
1117		sch->q.qlen--;
1118	}
1119}
1120
1121static inline void qdisc_update_stats_at_enqueue(struct Qdisc *sch,
1122						 unsigned int pkt_len)
1123{
1124	if (qdisc_is_percpu_stats(sch)) {
1125		qdisc_qstats_cpu_qlen_inc(sch);
1126		this_cpu_add(sch->cpu_qstats->backlog, pkt_len);
1127	} else {
1128		sch->qstats.backlog += pkt_len;
1129		sch->q.qlen++;
1130	}
1131}
1132
1133/* use instead of qdisc->dequeue() for all qdiscs queried with ->peek() */
1134static inline struct sk_buff *qdisc_dequeue_peeked(struct Qdisc *sch)
1135{
1136	struct sk_buff *skb = skb_peek(&sch->gso_skb);
1137
1138	if (skb) {
1139		skb = __skb_dequeue(&sch->gso_skb);
1140		if (qdisc_is_percpu_stats(sch)) {
1141			qdisc_qstats_cpu_backlog_dec(sch, skb);
1142			qdisc_qstats_cpu_qlen_dec(sch);
1143		} else {
1144			qdisc_qstats_backlog_dec(sch, skb);
1145			sch->q.qlen--;
1146		}
1147	} else {
1148		skb = sch->dequeue(sch);
1149	}
1150
1151	return skb;
1152}
1153
1154static inline void __qdisc_reset_queue(struct qdisc_skb_head *qh)
1155{
1156	/*
1157	 * We do not know the backlog in bytes of this list, it
1158	 * is up to the caller to correct it
1159	 */
1160	ASSERT_RTNL();
1161	if (qh->qlen) {
1162		rtnl_kfree_skbs(qh->head, qh->tail);
1163
1164		qh->head = NULL;
1165		qh->tail = NULL;
1166		qh->qlen = 0;
1167	}
1168}
1169
1170static inline void qdisc_reset_queue(struct Qdisc *sch)
1171{
1172	__qdisc_reset_queue(&sch->q);
1173}
1174
1175static inline struct Qdisc *qdisc_replace(struct Qdisc *sch, struct Qdisc *new,
1176					  struct Qdisc **pold)
1177{
1178	struct Qdisc *old;
1179
1180	sch_tree_lock(sch);
1181	old = *pold;
1182	*pold = new;
1183	if (old != NULL)
1184		qdisc_purge_queue(old);
1185	sch_tree_unlock(sch);
1186
1187	return old;
1188}
1189
1190static inline void rtnl_qdisc_drop(struct sk_buff *skb, struct Qdisc *sch)
1191{
1192	rtnl_kfree_skbs(skb, skb);
1193	qdisc_qstats_drop(sch);
1194}
1195
1196static inline int qdisc_drop_cpu(struct sk_buff *skb, struct Qdisc *sch,
1197				 struct sk_buff **to_free)
1198{
1199	__qdisc_drop(skb, to_free);
1200	qdisc_qstats_cpu_drop(sch);
1201
1202	return NET_XMIT_DROP;
1203}
1204
1205static inline int qdisc_drop(struct sk_buff *skb, struct Qdisc *sch,
1206			     struct sk_buff **to_free)
1207{
1208	__qdisc_drop(skb, to_free);
1209	qdisc_qstats_drop(sch);
1210
1211	return NET_XMIT_DROP;
1212}
1213
1214static inline int qdisc_drop_all(struct sk_buff *skb, struct Qdisc *sch,
1215				 struct sk_buff **to_free)
1216{
1217	__qdisc_drop_all(skb, to_free);
1218	qdisc_qstats_drop(sch);
1219
1220	return NET_XMIT_DROP;
1221}
1222
1223struct psched_ratecfg {
1224	u64	rate_bytes_ps; /* bytes per second */
1225	u32	mult;
1226	u16	overhead;
1227	u16	mpu;
1228	u8	linklayer;
1229	u8	shift;
1230};
1231
1232static inline u64 psched_l2t_ns(const struct psched_ratecfg *r,
1233				unsigned int len)
1234{
1235	len += r->overhead;
1236
1237	if (len < r->mpu)
1238		len = r->mpu;
1239
1240	if (unlikely(r->linklayer == TC_LINKLAYER_ATM))
1241		return ((u64)(DIV_ROUND_UP(len,48)*53) * r->mult) >> r->shift;
1242
1243	return ((u64)len * r->mult) >> r->shift;
1244}
1245
1246void psched_ratecfg_precompute(struct psched_ratecfg *r,
1247			       const struct tc_ratespec *conf,
1248			       u64 rate64);
1249
1250static inline void psched_ratecfg_getrate(struct tc_ratespec *res,
1251					  const struct psched_ratecfg *r)
1252{
1253	memset(res, 0, sizeof(*res));
1254
1255	/* legacy struct tc_ratespec has a 32bit @rate field
1256	 * Qdisc using 64bit rate should add new attributes
1257	 * in order to maintain compatibility.
1258	 */
1259	res->rate = min_t(u64, r->rate_bytes_ps, ~0U);
1260
1261	res->overhead = r->overhead;
1262	res->mpu = r->mpu;
1263	res->linklayer = (r->linklayer & TC_LINKLAYER_MASK);
1264}
1265
1266struct psched_pktrate {
1267	u64	rate_pkts_ps; /* packets per second */
1268	u32	mult;
1269	u8	shift;
1270};
1271
1272static inline u64 psched_pkt2t_ns(const struct psched_pktrate *r,
1273				  unsigned int pkt_num)
1274{
1275	return ((u64)pkt_num * r->mult) >> r->shift;
1276}
1277
1278void psched_ppscfg_precompute(struct psched_pktrate *r, u64 pktrate64);
1279
1280/* Mini Qdisc serves for specific needs of ingress/clsact Qdisc.
1281 * The fast path only needs to access filter list and to update stats
1282 */
1283struct mini_Qdisc {
1284	struct tcf_proto *filter_list;
1285	struct tcf_block *block;
1286	struct gnet_stats_basic_sync __percpu *cpu_bstats;
1287	struct gnet_stats_queue	__percpu *cpu_qstats;
1288	unsigned long rcu_state;
1289};
1290
1291static inline void mini_qdisc_bstats_cpu_update(struct mini_Qdisc *miniq,
1292						const struct sk_buff *skb)
1293{
1294	bstats_update(this_cpu_ptr(miniq->cpu_bstats), skb);
1295}
1296
1297static inline void mini_qdisc_qstats_cpu_drop(struct mini_Qdisc *miniq)
1298{
1299	this_cpu_inc(miniq->cpu_qstats->drops);
1300}
1301
1302struct mini_Qdisc_pair {
1303	struct mini_Qdisc miniq1;
1304	struct mini_Qdisc miniq2;
1305	struct mini_Qdisc __rcu **p_miniq;
1306};
1307
1308void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp,
1309			  struct tcf_proto *tp_head);
1310void mini_qdisc_pair_init(struct mini_Qdisc_pair *miniqp, struct Qdisc *qdisc,
1311			  struct mini_Qdisc __rcu **p_miniq);
1312void mini_qdisc_pair_block_init(struct mini_Qdisc_pair *miniqp,
1313				struct tcf_block *block);
1314
1315void mq_change_real_num_tx(struct Qdisc *sch, unsigned int new_real_tx);
1316
1317int sch_frag_xmit_hook(struct sk_buff *skb, int (*xmit)(struct sk_buff *skb));
1318
1319/* Make sure qdisc is no longer in SCHED state. */
1320static inline void qdisc_synchronize(const struct Qdisc *q)
1321{
1322	while (test_bit(__QDISC_STATE_SCHED, &q->state))
1323		msleep(1);
1324}
1325
1326#endif
1327