1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * net/sched/sch_generic.c	Generic packet scheduler routines.
4 *
5 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
6 *              Jamal Hadi Salim, <hadi@cyberus.ca> 990601
7 *              - Ingress support
8 */
9
10#include <linux/bitops.h>
11#include <linux/module.h>
12#include <linux/types.h>
13#include <linux/kernel.h>
14#include <linux/sched.h>
15#include <linux/string.h>
16#include <linux/errno.h>
17#include <linux/netdevice.h>
18#include <linux/skbuff.h>
19#include <linux/rtnetlink.h>
20#include <linux/init.h>
21#include <linux/rcupdate.h>
22#include <linux/list.h>
23#include <linux/slab.h>
24#include <linux/if_vlan.h>
25#include <linux/skb_array.h>
26#include <linux/if_macvlan.h>
27#include <net/sch_generic.h>
28#include <net/pkt_sched.h>
29#include <net/dst.h>
30#include <trace/events/qdisc.h>
31#include <trace/events/net.h>
32#include <net/xfrm.h>
33
34/* Qdisc to use by default */
35const struct Qdisc_ops *default_qdisc_ops = &pfifo_fast_ops;
36EXPORT_SYMBOL(default_qdisc_ops);
37
38static void qdisc_maybe_clear_missed(struct Qdisc *q,
39				     const struct netdev_queue *txq)
40{
41	clear_bit(__QDISC_STATE_MISSED, &q->state);
42
43	/* Make sure the below netif_xmit_frozen_or_stopped()
44	 * checking happens after clearing STATE_MISSED.
45	 */
46	smp_mb__after_atomic();
47
48	/* Checking netif_xmit_frozen_or_stopped() again to
49	 * make sure STATE_MISSED is set if the STATE_MISSED
50	 * set by netif_tx_wake_queue()'s rescheduling of
51	 * net_tx_action() is cleared by the above clear_bit().
52	 */
53	if (!netif_xmit_frozen_or_stopped(txq))
54		set_bit(__QDISC_STATE_MISSED, &q->state);
55}
56
57/* Main transmission queue. */
58
59/* Modifications to data participating in scheduling must be protected with
60 * qdisc_lock(qdisc) spinlock.
61 *
62 * The idea is the following:
63 * - enqueue, dequeue are serialized via qdisc root lock
64 * - ingress filtering is also serialized via qdisc root lock
65 * - updates to tree and tree walking are only done under the rtnl mutex.
66 */
67
68#define SKB_XOFF_MAGIC ((struct sk_buff *)1UL)
69
70static inline struct sk_buff *__skb_dequeue_bad_txq(struct Qdisc *q)
71{
72	const struct netdev_queue *txq = q->dev_queue;
73	spinlock_t *lock = NULL;
74	struct sk_buff *skb;
75
76	if (q->flags & TCQ_F_NOLOCK) {
77		lock = qdisc_lock(q);
78		spin_lock(lock);
79	}
80
81	skb = skb_peek(&q->skb_bad_txq);
82	if (skb) {
83		/* check the reason of requeuing without tx lock first */
84		txq = skb_get_tx_queue(txq->dev, skb);
85		if (!netif_xmit_frozen_or_stopped(txq)) {
86			skb = __skb_dequeue(&q->skb_bad_txq);
87			if (qdisc_is_percpu_stats(q)) {
88				qdisc_qstats_cpu_backlog_dec(q, skb);
89				qdisc_qstats_cpu_qlen_dec(q);
90			} else {
91				qdisc_qstats_backlog_dec(q, skb);
92				q->q.qlen--;
93			}
94		} else {
95			skb = SKB_XOFF_MAGIC;
96			qdisc_maybe_clear_missed(q, txq);
97		}
98	}
99
100	if (lock)
101		spin_unlock(lock);
102
103	return skb;
104}
105
106static inline struct sk_buff *qdisc_dequeue_skb_bad_txq(struct Qdisc *q)
107{
108	struct sk_buff *skb = skb_peek(&q->skb_bad_txq);
109
110	if (unlikely(skb))
111		skb = __skb_dequeue_bad_txq(q);
112
113	return skb;
114}
115
116static inline void qdisc_enqueue_skb_bad_txq(struct Qdisc *q,
117					     struct sk_buff *skb)
118{
119	spinlock_t *lock = NULL;
120
121	if (q->flags & TCQ_F_NOLOCK) {
122		lock = qdisc_lock(q);
123		spin_lock(lock);
124	}
125
126	__skb_queue_tail(&q->skb_bad_txq, skb);
127
128	if (qdisc_is_percpu_stats(q)) {
129		qdisc_qstats_cpu_backlog_inc(q, skb);
130		qdisc_qstats_cpu_qlen_inc(q);
131	} else {
132		qdisc_qstats_backlog_inc(q, skb);
133		q->q.qlen++;
134	}
135
136	if (lock)
137		spin_unlock(lock);
138}
139
140static inline void dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
141{
142	spinlock_t *lock = NULL;
143
144	if (q->flags & TCQ_F_NOLOCK) {
145		lock = qdisc_lock(q);
146		spin_lock(lock);
147	}
148
149	while (skb) {
150		struct sk_buff *next = skb->next;
151
152		__skb_queue_tail(&q->gso_skb, skb);
153
154		/* it's still part of the queue */
155		if (qdisc_is_percpu_stats(q)) {
156			qdisc_qstats_cpu_requeues_inc(q);
157			qdisc_qstats_cpu_backlog_inc(q, skb);
158			qdisc_qstats_cpu_qlen_inc(q);
159		} else {
160			q->qstats.requeues++;
161			qdisc_qstats_backlog_inc(q, skb);
162			q->q.qlen++;
163		}
164
165		skb = next;
166	}
167	if (lock)
168		spin_unlock(lock);
169	__netif_schedule(q);
170}
171
172static void try_bulk_dequeue_skb(struct Qdisc *q,
173				 struct sk_buff *skb,
174				 const struct netdev_queue *txq,
175				 int *packets)
176{
177	int bytelimit = qdisc_avail_bulklimit(txq) - skb->len;
178
179	while (bytelimit > 0) {
180		struct sk_buff *nskb = q->dequeue(q);
181
182		if (!nskb)
183			break;
184
185		bytelimit -= nskb->len; /* covers GSO len */
186		skb->next = nskb;
187		skb = nskb;
188		(*packets)++; /* GSO counts as one pkt */
189	}
190	skb_mark_not_on_list(skb);
191}
192
193/* This variant of try_bulk_dequeue_skb() makes sure
194 * all skbs in the chain are for the same txq
195 */
196static void try_bulk_dequeue_skb_slow(struct Qdisc *q,
197				      struct sk_buff *skb,
198				      int *packets)
199{
200	int mapping = skb_get_queue_mapping(skb);
201	struct sk_buff *nskb;
202	int cnt = 0;
203
204	do {
205		nskb = q->dequeue(q);
206		if (!nskb)
207			break;
208		if (unlikely(skb_get_queue_mapping(nskb) != mapping)) {
209			qdisc_enqueue_skb_bad_txq(q, nskb);
210			break;
211		}
212		skb->next = nskb;
213		skb = nskb;
214	} while (++cnt < 8);
215	(*packets) += cnt;
216	skb_mark_not_on_list(skb);
217}
218
219/* Note that dequeue_skb can possibly return a SKB list (via skb->next).
220 * A requeued skb (via q->gso_skb) can also be a SKB list.
221 */
222static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate,
223				   int *packets)
224{
225	const struct netdev_queue *txq = q->dev_queue;
226	struct sk_buff *skb = NULL;
227
228	*packets = 1;
229	if (unlikely(!skb_queue_empty(&q->gso_skb))) {
230		spinlock_t *lock = NULL;
231
232		if (q->flags & TCQ_F_NOLOCK) {
233			lock = qdisc_lock(q);
234			spin_lock(lock);
235		}
236
237		skb = skb_peek(&q->gso_skb);
238
239		/* skb may be null if another cpu pulls gso_skb off in between
240		 * empty check and lock.
241		 */
242		if (!skb) {
243			if (lock)
244				spin_unlock(lock);
245			goto validate;
246		}
247
248		/* skb in gso_skb were already validated */
249		*validate = false;
250		if (xfrm_offload(skb))
251			*validate = true;
252		/* check the reason of requeuing without tx lock first */
253		txq = skb_get_tx_queue(txq->dev, skb);
254		if (!netif_xmit_frozen_or_stopped(txq)) {
255			skb = __skb_dequeue(&q->gso_skb);
256			if (qdisc_is_percpu_stats(q)) {
257				qdisc_qstats_cpu_backlog_dec(q, skb);
258				qdisc_qstats_cpu_qlen_dec(q);
259			} else {
260				qdisc_qstats_backlog_dec(q, skb);
261				q->q.qlen--;
262			}
263		} else {
264			skb = NULL;
265			qdisc_maybe_clear_missed(q, txq);
266		}
267		if (lock)
268			spin_unlock(lock);
269		goto trace;
270	}
271validate:
272	*validate = true;
273
274	if ((q->flags & TCQ_F_ONETXQUEUE) &&
275	    netif_xmit_frozen_or_stopped(txq)) {
276		qdisc_maybe_clear_missed(q, txq);
277		return skb;
278	}
279
280	skb = qdisc_dequeue_skb_bad_txq(q);
281	if (unlikely(skb)) {
282		if (skb == SKB_XOFF_MAGIC)
283			return NULL;
284		goto bulk;
285	}
286	skb = q->dequeue(q);
287	if (skb) {
288bulk:
289		if (qdisc_may_bulk(q))
290			try_bulk_dequeue_skb(q, skb, txq, packets);
291		else
292			try_bulk_dequeue_skb_slow(q, skb, packets);
293	}
294trace:
295	trace_qdisc_dequeue(q, txq, *packets, skb);
296	return skb;
297}
298
299/*
300 * Transmit possibly several skbs, and handle the return status as
301 * required. Owning running seqcount bit guarantees that
302 * only one CPU can execute this function.
303 *
304 * Returns to the caller:
305 *				false  - hardware queue frozen backoff
306 *				true   - feel free to send more pkts
307 */
308bool sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
309		     struct net_device *dev, struct netdev_queue *txq,
310		     spinlock_t *root_lock, bool validate)
311{
312	int ret = NETDEV_TX_BUSY;
313	bool again = false;
314
315	/* And release qdisc */
316	if (root_lock)
317		spin_unlock(root_lock);
318
319	/* Note that we validate skb (GSO, checksum, ...) outside of locks */
320	if (validate)
321		skb = validate_xmit_skb_list(skb, dev, &again);
322
323#ifdef CONFIG_XFRM_OFFLOAD
324	if (unlikely(again)) {
325		if (root_lock)
326			spin_lock(root_lock);
327
328		dev_requeue_skb(skb, q);
329		return false;
330	}
331#endif
332
333	if (likely(skb)) {
334		HARD_TX_LOCK(dev, txq, smp_processor_id());
335		if (!netif_xmit_frozen_or_stopped(txq))
336			skb = dev_hard_start_xmit(skb, dev, txq, &ret);
337		else
338			qdisc_maybe_clear_missed(q, txq);
339
340		HARD_TX_UNLOCK(dev, txq);
341	} else {
342		if (root_lock)
343			spin_lock(root_lock);
344		return true;
345	}
346
347	if (root_lock)
348		spin_lock(root_lock);
349
350	if (!dev_xmit_complete(ret)) {
351		/* Driver returned NETDEV_TX_BUSY - requeue skb */
352		if (unlikely(ret != NETDEV_TX_BUSY))
353			net_warn_ratelimited("BUG %s code %d qlen %d\n",
354					     dev->name, ret, q->q.qlen);
355
356		dev_requeue_skb(skb, q);
357		return false;
358	}
359
360	return true;
361}
362
363/*
364 * NOTE: Called under qdisc_lock(q) with locally disabled BH.
365 *
366 * running seqcount guarantees only one CPU can process
367 * this qdisc at a time. qdisc_lock(q) serializes queue accesses for
368 * this queue.
369 *
370 *  netif_tx_lock serializes accesses to device driver.
371 *
372 *  qdisc_lock(q) and netif_tx_lock are mutually exclusive,
373 *  if one is grabbed, another must be free.
374 *
375 * Note, that this procedure can be called by a watchdog timer
376 *
377 * Returns to the caller:
378 *				0  - queue is empty or throttled.
379 *				>0 - queue is not empty.
380 *
381 */
382static inline bool qdisc_restart(struct Qdisc *q, int *packets)
383{
384	spinlock_t *root_lock = NULL;
385	struct netdev_queue *txq;
386	struct net_device *dev;
387	struct sk_buff *skb;
388	bool validate;
389
390	/* Dequeue packet */
391	skb = dequeue_skb(q, &validate, packets);
392	if (unlikely(!skb))
393		return false;
394
395	if (!(q->flags & TCQ_F_NOLOCK))
396		root_lock = qdisc_lock(q);
397
398	dev = qdisc_dev(q);
399	txq = skb_get_tx_queue(dev, skb);
400
401	return sch_direct_xmit(skb, q, dev, txq, root_lock, validate);
402}
403
404void __qdisc_run(struct Qdisc *q)
405{
406	int quota = READ_ONCE(dev_tx_weight);
407	int packets;
408
409	while (qdisc_restart(q, &packets)) {
410		quota -= packets;
411		if (quota <= 0) {
412			__netif_schedule(q);
413			break;
414		}
415	}
416}
417
418unsigned long dev_trans_start(struct net_device *dev)
419{
420	unsigned long val, res;
421	unsigned int i;
422
423	if (is_vlan_dev(dev))
424		dev = vlan_dev_real_dev(dev);
425	else if (netif_is_macvlan(dev))
426		dev = macvlan_dev_real_dev(dev);
427	res = netdev_get_tx_queue(dev, 0)->trans_start;
428	for (i = 1; i < dev->num_tx_queues; i++) {
429		val = netdev_get_tx_queue(dev, i)->trans_start;
430		if (val && time_after(val, res))
431			res = val;
432	}
433
434	return res;
435}
436EXPORT_SYMBOL(dev_trans_start);
437
438static void dev_watchdog(struct timer_list *t)
439{
440	struct net_device *dev = from_timer(dev, t, watchdog_timer);
441
442	netif_tx_lock(dev);
443	if (!qdisc_tx_is_noop(dev)) {
444		if (netif_device_present(dev) &&
445		    netif_running(dev) &&
446		    netif_carrier_ok(dev)) {
447			int some_queue_timedout = 0;
448			unsigned int i;
449			unsigned long trans_start;
450
451			for (i = 0; i < dev->num_tx_queues; i++) {
452				struct netdev_queue *txq;
453
454				txq = netdev_get_tx_queue(dev, i);
455				trans_start = txq->trans_start;
456				if (netif_xmit_stopped(txq) &&
457				    time_after(jiffies, (trans_start +
458							 dev->watchdog_timeo))) {
459					some_queue_timedout = 1;
460					txq->trans_timeout++;
461					break;
462				}
463			}
464
465			if (some_queue_timedout) {
466				trace_net_dev_xmit_timeout(dev, i);
467				WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit queue %u timed out\n",
468				       dev->name, netdev_drivername(dev), i);
469				dev->netdev_ops->ndo_tx_timeout(dev, i);
470			}
471			if (!mod_timer(&dev->watchdog_timer,
472				       round_jiffies(jiffies +
473						     dev->watchdog_timeo)))
474				dev_hold(dev);
475		}
476	}
477	netif_tx_unlock(dev);
478
479	dev_put(dev);
480}
481
482void __netdev_watchdog_up(struct net_device *dev)
483{
484	if (dev->netdev_ops->ndo_tx_timeout) {
485		if (dev->watchdog_timeo <= 0)
486			dev->watchdog_timeo = 5*HZ;
487		if (!mod_timer(&dev->watchdog_timer,
488			       round_jiffies(jiffies + dev->watchdog_timeo)))
489			dev_hold(dev);
490	}
491}
492EXPORT_SYMBOL_GPL(__netdev_watchdog_up);
493
494static void dev_watchdog_up(struct net_device *dev)
495{
496	__netdev_watchdog_up(dev);
497}
498
499static void dev_watchdog_down(struct net_device *dev)
500{
501	netif_tx_lock_bh(dev);
502	if (del_timer(&dev->watchdog_timer))
503		dev_put(dev);
504	netif_tx_unlock_bh(dev);
505}
506
507/**
508 *	netif_carrier_on - set carrier
509 *	@dev: network device
510 *
511 * Device has detected acquisition of carrier.
512 */
513void netif_carrier_on(struct net_device *dev)
514{
515	if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state)) {
516		if (dev->reg_state == NETREG_UNINITIALIZED)
517			return;
518		atomic_inc(&dev->carrier_up_count);
519		linkwatch_fire_event(dev);
520		if (netif_running(dev))
521			__netdev_watchdog_up(dev);
522	}
523}
524EXPORT_SYMBOL(netif_carrier_on);
525
526/**
527 *	netif_carrier_off - clear carrier
528 *	@dev: network device
529 *
530 * Device has detected loss of carrier.
531 */
532void netif_carrier_off(struct net_device *dev)
533{
534	if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state)) {
535		if (dev->reg_state == NETREG_UNINITIALIZED)
536			return;
537		atomic_inc(&dev->carrier_down_count);
538		linkwatch_fire_event(dev);
539	}
540}
541EXPORT_SYMBOL(netif_carrier_off);
542
543/* "NOOP" scheduler: the best scheduler, recommended for all interfaces
544   under all circumstances. It is difficult to invent anything faster or
545   cheaper.
546 */
547
548static int noop_enqueue(struct sk_buff *skb, struct Qdisc *qdisc,
549			struct sk_buff **to_free)
550{
551	__qdisc_drop(skb, to_free);
552	return NET_XMIT_CN;
553}
554
555static struct sk_buff *noop_dequeue(struct Qdisc *qdisc)
556{
557	return NULL;
558}
559
560struct Qdisc_ops noop_qdisc_ops __read_mostly = {
561	.id		=	"noop",
562	.priv_size	=	0,
563	.enqueue	=	noop_enqueue,
564	.dequeue	=	noop_dequeue,
565	.peek		=	noop_dequeue,
566	.owner		=	THIS_MODULE,
567};
568
569static struct netdev_queue noop_netdev_queue = {
570	RCU_POINTER_INITIALIZER(qdisc, &noop_qdisc),
571	.qdisc_sleeping	=	&noop_qdisc,
572};
573
574struct Qdisc noop_qdisc = {
575	.enqueue	=	noop_enqueue,
576	.dequeue	=	noop_dequeue,
577	.flags		=	TCQ_F_BUILTIN,
578	.ops		=	&noop_qdisc_ops,
579	.q.lock		=	__SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock),
580	.dev_queue	=	&noop_netdev_queue,
581	.running	=	SEQCNT_ZERO(noop_qdisc.running),
582	.busylock	=	__SPIN_LOCK_UNLOCKED(noop_qdisc.busylock),
583	.gso_skb = {
584		.next = (struct sk_buff *)&noop_qdisc.gso_skb,
585		.prev = (struct sk_buff *)&noop_qdisc.gso_skb,
586		.qlen = 0,
587		.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.gso_skb.lock),
588	},
589	.skb_bad_txq = {
590		.next = (struct sk_buff *)&noop_qdisc.skb_bad_txq,
591		.prev = (struct sk_buff *)&noop_qdisc.skb_bad_txq,
592		.qlen = 0,
593		.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.skb_bad_txq.lock),
594	},
595};
596EXPORT_SYMBOL(noop_qdisc);
597
598static int noqueue_init(struct Qdisc *qdisc, struct nlattr *opt,
599			struct netlink_ext_ack *extack)
600{
601	/* register_qdisc() assigns a default of noop_enqueue if unset,
602	 * but __dev_queue_xmit() treats noqueue only as such
603	 * if this is NULL - so clear it here. */
604	qdisc->enqueue = NULL;
605	return 0;
606}
607
608struct Qdisc_ops noqueue_qdisc_ops __read_mostly = {
609	.id		=	"noqueue",
610	.priv_size	=	0,
611	.init		=	noqueue_init,
612	.enqueue	=	noop_enqueue,
613	.dequeue	=	noop_dequeue,
614	.peek		=	noop_dequeue,
615	.owner		=	THIS_MODULE,
616};
617
618static const u8 prio2band[TC_PRIO_MAX + 1] = {
619	1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1
620};
621
622/* 3-band FIFO queue: old style, but should be a bit faster than
623   generic prio+fifo combination.
624 */
625
626#define PFIFO_FAST_BANDS 3
627
628/*
629 * Private data for a pfifo_fast scheduler containing:
630 *	- rings for priority bands
631 */
632struct pfifo_fast_priv {
633	struct skb_array q[PFIFO_FAST_BANDS];
634};
635
636static inline struct skb_array *band2list(struct pfifo_fast_priv *priv,
637					  int band)
638{
639	return &priv->q[band];
640}
641
642static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc,
643			      struct sk_buff **to_free)
644{
645	int band = prio2band[skb->priority & TC_PRIO_MAX];
646	struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
647	struct skb_array *q = band2list(priv, band);
648	unsigned int pkt_len = qdisc_pkt_len(skb);
649	int err;
650
651	err = skb_array_produce(q, skb);
652
653	if (unlikely(err)) {
654		if (qdisc_is_percpu_stats(qdisc))
655			return qdisc_drop_cpu(skb, qdisc, to_free);
656		else
657			return qdisc_drop(skb, qdisc, to_free);
658	}
659
660	qdisc_update_stats_at_enqueue(qdisc, pkt_len);
661	return NET_XMIT_SUCCESS;
662}
663
664static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc)
665{
666	struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
667	struct sk_buff *skb = NULL;
668	bool need_retry = true;
669	int band;
670
671retry:
672	for (band = 0; band < PFIFO_FAST_BANDS && !skb; band++) {
673		struct skb_array *q = band2list(priv, band);
674
675		if (__skb_array_empty(q))
676			continue;
677
678		skb = __skb_array_consume(q);
679	}
680	if (likely(skb)) {
681		qdisc_update_stats_at_dequeue(qdisc, skb);
682	} else if (need_retry &&
683		   test_bit(__QDISC_STATE_MISSED, &qdisc->state)) {
684		/* Delay clearing the STATE_MISSED here to reduce
685		 * the overhead of the second spin_trylock() in
686		 * qdisc_run_begin() and __netif_schedule() calling
687		 * in qdisc_run_end().
688		 */
689		clear_bit(__QDISC_STATE_MISSED, &qdisc->state);
690
691		/* Make sure dequeuing happens after clearing
692		 * STATE_MISSED.
693		 */
694		smp_mb__after_atomic();
695
696		need_retry = false;
697
698		goto retry;
699	} else {
700		WRITE_ONCE(qdisc->empty, true);
701	}
702
703	return skb;
704}
705
706static struct sk_buff *pfifo_fast_peek(struct Qdisc *qdisc)
707{
708	struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
709	struct sk_buff *skb = NULL;
710	int band;
711
712	for (band = 0; band < PFIFO_FAST_BANDS && !skb; band++) {
713		struct skb_array *q = band2list(priv, band);
714
715		skb = __skb_array_peek(q);
716	}
717
718	return skb;
719}
720
721static void pfifo_fast_reset(struct Qdisc *qdisc)
722{
723	int i, band;
724	struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
725
726	for (band = 0; band < PFIFO_FAST_BANDS; band++) {
727		struct skb_array *q = band2list(priv, band);
728		struct sk_buff *skb;
729
730		/* NULL ring is possible if destroy path is due to a failed
731		 * skb_array_init() in pfifo_fast_init() case.
732		 */
733		if (!q->ring.queue)
734			continue;
735
736		while ((skb = __skb_array_consume(q)) != NULL)
737			kfree_skb(skb);
738	}
739
740	if (qdisc_is_percpu_stats(qdisc)) {
741		for_each_possible_cpu(i) {
742			struct gnet_stats_queue *q;
743
744			q = per_cpu_ptr(qdisc->cpu_qstats, i);
745			q->backlog = 0;
746			q->qlen = 0;
747		}
748	}
749}
750
751static int pfifo_fast_dump(struct Qdisc *qdisc, struct sk_buff *skb)
752{
753	struct tc_prio_qopt opt = { .bands = PFIFO_FAST_BANDS };
754
755	memcpy(&opt.priomap, prio2band, TC_PRIO_MAX + 1);
756	if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt))
757		goto nla_put_failure;
758	return skb->len;
759
760nla_put_failure:
761	return -1;
762}
763
764static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt,
765			   struct netlink_ext_ack *extack)
766{
767	unsigned int qlen = qdisc_dev(qdisc)->tx_queue_len;
768	struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
769	int prio;
770
771	/* guard against zero length rings */
772	if (!qlen)
773		return -EINVAL;
774
775	for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) {
776		struct skb_array *q = band2list(priv, prio);
777		int err;
778
779		err = skb_array_init(q, qlen, GFP_KERNEL);
780		if (err)
781			return -ENOMEM;
782	}
783
784	/* Can by-pass the queue discipline */
785	qdisc->flags |= TCQ_F_CAN_BYPASS;
786	return 0;
787}
788
789static void pfifo_fast_destroy(struct Qdisc *sch)
790{
791	struct pfifo_fast_priv *priv = qdisc_priv(sch);
792	int prio;
793
794	for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) {
795		struct skb_array *q = band2list(priv, prio);
796
797		/* NULL ring is possible if destroy path is due to a failed
798		 * skb_array_init() in pfifo_fast_init() case.
799		 */
800		if (!q->ring.queue)
801			continue;
802		/* Destroy ring but no need to kfree_skb because a call to
803		 * pfifo_fast_reset() has already done that work.
804		 */
805		ptr_ring_cleanup(&q->ring, NULL);
806	}
807}
808
809static int pfifo_fast_change_tx_queue_len(struct Qdisc *sch,
810					  unsigned int new_len)
811{
812	struct pfifo_fast_priv *priv = qdisc_priv(sch);
813	struct skb_array *bands[PFIFO_FAST_BANDS];
814	int prio;
815
816	for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) {
817		struct skb_array *q = band2list(priv, prio);
818
819		bands[prio] = q;
820	}
821
822	return skb_array_resize_multiple(bands, PFIFO_FAST_BANDS, new_len,
823					 GFP_KERNEL);
824}
825
826struct Qdisc_ops pfifo_fast_ops __read_mostly = {
827	.id		=	"pfifo_fast",
828	.priv_size	=	sizeof(struct pfifo_fast_priv),
829	.enqueue	=	pfifo_fast_enqueue,
830	.dequeue	=	pfifo_fast_dequeue,
831	.peek		=	pfifo_fast_peek,
832	.init		=	pfifo_fast_init,
833	.destroy	=	pfifo_fast_destroy,
834	.reset		=	pfifo_fast_reset,
835	.dump		=	pfifo_fast_dump,
836	.change_tx_queue_len =  pfifo_fast_change_tx_queue_len,
837	.owner		=	THIS_MODULE,
838	.static_flags	=	TCQ_F_NOLOCK | TCQ_F_CPUSTATS,
839};
840EXPORT_SYMBOL(pfifo_fast_ops);
841
842static struct lock_class_key qdisc_tx_busylock;
843static struct lock_class_key qdisc_running_key;
844
845struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
846			  const struct Qdisc_ops *ops,
847			  struct netlink_ext_ack *extack)
848{
849	struct Qdisc *sch;
850	unsigned int size = sizeof(*sch) + ops->priv_size;
851	int err = -ENOBUFS;
852	struct net_device *dev;
853
854	if (!dev_queue) {
855		NL_SET_ERR_MSG(extack, "No device queue given");
856		err = -EINVAL;
857		goto errout;
858	}
859
860	dev = dev_queue->dev;
861	sch = kzalloc_node(size, GFP_KERNEL, netdev_queue_numa_node_read(dev_queue));
862
863	if (!sch)
864		goto errout;
865	__skb_queue_head_init(&sch->gso_skb);
866	__skb_queue_head_init(&sch->skb_bad_txq);
867	qdisc_skb_head_init(&sch->q);
868	spin_lock_init(&sch->q.lock);
869
870	if (ops->static_flags & TCQ_F_CPUSTATS) {
871		sch->cpu_bstats =
872			netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
873		if (!sch->cpu_bstats)
874			goto errout1;
875
876		sch->cpu_qstats = alloc_percpu(struct gnet_stats_queue);
877		if (!sch->cpu_qstats) {
878			free_percpu(sch->cpu_bstats);
879			goto errout1;
880		}
881	}
882
883	spin_lock_init(&sch->busylock);
884	lockdep_set_class(&sch->busylock,
885			  dev->qdisc_tx_busylock ?: &qdisc_tx_busylock);
886
887	/* seqlock has the same scope of busylock, for NOLOCK qdisc */
888	spin_lock_init(&sch->seqlock);
889	lockdep_set_class(&sch->seqlock,
890			  dev->qdisc_tx_busylock ?: &qdisc_tx_busylock);
891
892	seqcount_init(&sch->running);
893	lockdep_set_class(&sch->running,
894			  dev->qdisc_running_key ?: &qdisc_running_key);
895
896	sch->ops = ops;
897	sch->flags = ops->static_flags;
898	sch->enqueue = ops->enqueue;
899	sch->dequeue = ops->dequeue;
900	sch->dev_queue = dev_queue;
901	sch->empty = true;
902	dev_hold(dev);
903	refcount_set(&sch->refcnt, 1);
904
905	return sch;
906errout1:
907	kfree(sch);
908errout:
909	return ERR_PTR(err);
910}
911
912struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue,
913				const struct Qdisc_ops *ops,
914				unsigned int parentid,
915				struct netlink_ext_ack *extack)
916{
917	struct Qdisc *sch;
918
919	if (!try_module_get(ops->owner)) {
920		NL_SET_ERR_MSG(extack, "Failed to increase module reference counter");
921		return NULL;
922	}
923
924	sch = qdisc_alloc(dev_queue, ops, extack);
925	if (IS_ERR(sch)) {
926		module_put(ops->owner);
927		return NULL;
928	}
929	sch->parent = parentid;
930
931	if (!ops->init || ops->init(sch, NULL, extack) == 0) {
932		trace_qdisc_create(ops, dev_queue->dev, parentid);
933		return sch;
934	}
935
936	qdisc_put(sch);
937	return NULL;
938}
939EXPORT_SYMBOL(qdisc_create_dflt);
940
941/* Under qdisc_lock(qdisc) and BH! */
942
943void qdisc_reset(struct Qdisc *qdisc)
944{
945	const struct Qdisc_ops *ops = qdisc->ops;
946	struct sk_buff *skb, *tmp;
947
948	trace_qdisc_reset(qdisc);
949
950	if (ops->reset)
951		ops->reset(qdisc);
952
953	skb_queue_walk_safe(&qdisc->gso_skb, skb, tmp) {
954		__skb_unlink(skb, &qdisc->gso_skb);
955		kfree_skb_list(skb);
956	}
957
958	skb_queue_walk_safe(&qdisc->skb_bad_txq, skb, tmp) {
959		__skb_unlink(skb, &qdisc->skb_bad_txq);
960		kfree_skb_list(skb);
961	}
962
963	qdisc->q.qlen = 0;
964	qdisc->qstats.backlog = 0;
965}
966EXPORT_SYMBOL(qdisc_reset);
967
968void qdisc_free(struct Qdisc *qdisc)
969{
970	if (qdisc_is_percpu_stats(qdisc)) {
971		free_percpu(qdisc->cpu_bstats);
972		free_percpu(qdisc->cpu_qstats);
973	}
974
975	kfree(qdisc);
976}
977
978static void qdisc_free_cb(struct rcu_head *head)
979{
980	struct Qdisc *q = container_of(head, struct Qdisc, rcu);
981
982	qdisc_free(q);
983}
984
985static void qdisc_destroy(struct Qdisc *qdisc)
986{
987	const struct Qdisc_ops  *ops = qdisc->ops;
988
989#ifdef CONFIG_NET_SCHED
990	qdisc_hash_del(qdisc);
991
992	qdisc_put_stab(rtnl_dereference(qdisc->stab));
993#endif
994	gen_kill_estimator(&qdisc->rate_est);
995
996	qdisc_reset(qdisc);
997
998	if (ops->destroy)
999		ops->destroy(qdisc);
1000
1001	module_put(ops->owner);
1002	dev_put(qdisc_dev(qdisc));
1003
1004	trace_qdisc_destroy(qdisc);
1005
1006	call_rcu(&qdisc->rcu, qdisc_free_cb);
1007}
1008
1009void qdisc_put(struct Qdisc *qdisc)
1010{
1011	if (!qdisc)
1012		return;
1013
1014	if (qdisc->flags & TCQ_F_BUILTIN ||
1015	    !refcount_dec_and_test(&qdisc->refcnt))
1016		return;
1017
1018	qdisc_destroy(qdisc);
1019}
1020EXPORT_SYMBOL(qdisc_put);
1021
1022/* Version of qdisc_put() that is called with rtnl mutex unlocked.
1023 * Intended to be used as optimization, this function only takes rtnl lock if
1024 * qdisc reference counter reached zero.
1025 */
1026
1027void qdisc_put_unlocked(struct Qdisc *qdisc)
1028{
1029	if (qdisc->flags & TCQ_F_BUILTIN ||
1030	    !refcount_dec_and_rtnl_lock(&qdisc->refcnt))
1031		return;
1032
1033	qdisc_destroy(qdisc);
1034	rtnl_unlock();
1035}
1036EXPORT_SYMBOL(qdisc_put_unlocked);
1037
1038/* Attach toplevel qdisc to device queue. */
1039struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue,
1040			      struct Qdisc *qdisc)
1041{
1042	struct Qdisc *oqdisc = dev_queue->qdisc_sleeping;
1043	spinlock_t *root_lock;
1044
1045	root_lock = qdisc_lock(oqdisc);
1046	spin_lock_bh(root_lock);
1047
1048	/* ... and graft new one */
1049	if (qdisc == NULL)
1050		qdisc = &noop_qdisc;
1051	dev_queue->qdisc_sleeping = qdisc;
1052	rcu_assign_pointer(dev_queue->qdisc, &noop_qdisc);
1053
1054	spin_unlock_bh(root_lock);
1055
1056	return oqdisc;
1057}
1058EXPORT_SYMBOL(dev_graft_qdisc);
1059
1060static void shutdown_scheduler_queue(struct net_device *dev,
1061				     struct netdev_queue *dev_queue,
1062				     void *_qdisc_default)
1063{
1064	struct Qdisc *qdisc = dev_queue->qdisc_sleeping;
1065	struct Qdisc *qdisc_default = _qdisc_default;
1066
1067	if (qdisc) {
1068		rcu_assign_pointer(dev_queue->qdisc, qdisc_default);
1069		dev_queue->qdisc_sleeping = qdisc_default;
1070
1071		qdisc_put(qdisc);
1072	}
1073}
1074
1075static void attach_one_default_qdisc(struct net_device *dev,
1076				     struct netdev_queue *dev_queue,
1077				     void *_unused)
1078{
1079	struct Qdisc *qdisc;
1080	const struct Qdisc_ops *ops = default_qdisc_ops;
1081
1082	if (dev->priv_flags & IFF_NO_QUEUE)
1083		ops = &noqueue_qdisc_ops;
1084	else if(dev->type == ARPHRD_CAN)
1085		ops = &pfifo_fast_ops;
1086
1087	qdisc = qdisc_create_dflt(dev_queue, ops, TC_H_ROOT, NULL);
1088	if (!qdisc)
1089		return;
1090
1091	if (!netif_is_multiqueue(dev))
1092		qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
1093	dev_queue->qdisc_sleeping = qdisc;
1094}
1095
1096static void attach_default_qdiscs(struct net_device *dev)
1097{
1098	struct netdev_queue *txq;
1099	struct Qdisc *qdisc;
1100
1101	txq = netdev_get_tx_queue(dev, 0);
1102
1103	if (!netif_is_multiqueue(dev) ||
1104	    dev->priv_flags & IFF_NO_QUEUE) {
1105		netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL);
1106		qdisc = txq->qdisc_sleeping;
1107		rcu_assign_pointer(dev->qdisc, qdisc);
1108		qdisc_refcount_inc(qdisc);
1109	} else {
1110		qdisc = qdisc_create_dflt(txq, &mq_qdisc_ops, TC_H_ROOT, NULL);
1111		if (qdisc) {
1112			rcu_assign_pointer(dev->qdisc, qdisc);
1113			qdisc->ops->attach(qdisc);
1114		}
1115	}
1116	qdisc = rtnl_dereference(dev->qdisc);
1117
1118	/* Detect default qdisc setup/init failed and fallback to "noqueue" */
1119	if (qdisc == &noop_qdisc) {
1120		netdev_warn(dev, "default qdisc (%s) fail, fallback to %s\n",
1121			    default_qdisc_ops->id, noqueue_qdisc_ops.id);
1122		netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc);
1123		dev->priv_flags |= IFF_NO_QUEUE;
1124		netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL);
1125		qdisc = txq->qdisc_sleeping;
1126		rcu_assign_pointer(dev->qdisc, qdisc);
1127		qdisc_refcount_inc(qdisc);
1128		dev->priv_flags ^= IFF_NO_QUEUE;
1129	}
1130
1131#ifdef CONFIG_NET_SCHED
1132	if (qdisc != &noop_qdisc)
1133		qdisc_hash_add(qdisc, false);
1134#endif
1135}
1136
1137static void transition_one_qdisc(struct net_device *dev,
1138				 struct netdev_queue *dev_queue,
1139				 void *_need_watchdog)
1140{
1141	struct Qdisc *new_qdisc = dev_queue->qdisc_sleeping;
1142	int *need_watchdog_p = _need_watchdog;
1143
1144	if (!(new_qdisc->flags & TCQ_F_BUILTIN))
1145		clear_bit(__QDISC_STATE_DEACTIVATED, &new_qdisc->state);
1146
1147	rcu_assign_pointer(dev_queue->qdisc, new_qdisc);
1148	if (need_watchdog_p) {
1149		dev_queue->trans_start = 0;
1150		*need_watchdog_p = 1;
1151	}
1152}
1153
1154void dev_activate(struct net_device *dev)
1155{
1156	int need_watchdog;
1157
1158	/* No queueing discipline is attached to device;
1159	 * create default one for devices, which need queueing
1160	 * and noqueue_qdisc for virtual interfaces
1161	 */
1162
1163	if (rtnl_dereference(dev->qdisc) == &noop_qdisc)
1164		attach_default_qdiscs(dev);
1165
1166	if (!netif_carrier_ok(dev))
1167		/* Delay activation until next carrier-on event */
1168		return;
1169
1170	need_watchdog = 0;
1171	netdev_for_each_tx_queue(dev, transition_one_qdisc, &need_watchdog);
1172	if (dev_ingress_queue(dev))
1173		transition_one_qdisc(dev, dev_ingress_queue(dev), NULL);
1174
1175	if (need_watchdog) {
1176		netif_trans_update(dev);
1177		dev_watchdog_up(dev);
1178	}
1179}
1180EXPORT_SYMBOL(dev_activate);
1181
1182static void qdisc_deactivate(struct Qdisc *qdisc)
1183{
1184	if (qdisc->flags & TCQ_F_BUILTIN)
1185		return;
1186
1187	set_bit(__QDISC_STATE_DEACTIVATED, &qdisc->state);
1188}
1189
1190static void dev_deactivate_queue(struct net_device *dev,
1191				 struct netdev_queue *dev_queue,
1192				 void *_qdisc_default)
1193{
1194	struct Qdisc *qdisc_default = _qdisc_default;
1195	struct Qdisc *qdisc;
1196
1197	qdisc = rtnl_dereference(dev_queue->qdisc);
1198	if (qdisc) {
1199		qdisc_deactivate(qdisc);
1200		rcu_assign_pointer(dev_queue->qdisc, qdisc_default);
1201	}
1202}
1203
1204static void dev_reset_queue(struct net_device *dev,
1205			    struct netdev_queue *dev_queue,
1206			    void *_unused)
1207{
1208	struct Qdisc *qdisc;
1209	bool nolock;
1210
1211	qdisc = dev_queue->qdisc_sleeping;
1212	if (!qdisc)
1213		return;
1214
1215	nolock = qdisc->flags & TCQ_F_NOLOCK;
1216
1217	if (nolock)
1218		spin_lock_bh(&qdisc->seqlock);
1219	spin_lock_bh(qdisc_lock(qdisc));
1220
1221	qdisc_reset(qdisc);
1222
1223	spin_unlock_bh(qdisc_lock(qdisc));
1224	if (nolock) {
1225		clear_bit(__QDISC_STATE_MISSED, &qdisc->state);
1226		spin_unlock_bh(&qdisc->seqlock);
1227	}
1228}
1229
1230static bool some_qdisc_is_busy(struct net_device *dev)
1231{
1232	unsigned int i;
1233
1234	for (i = 0; i < dev->num_tx_queues; i++) {
1235		struct netdev_queue *dev_queue;
1236		spinlock_t *root_lock;
1237		struct Qdisc *q;
1238		int val;
1239
1240		dev_queue = netdev_get_tx_queue(dev, i);
1241		q = dev_queue->qdisc_sleeping;
1242
1243		root_lock = qdisc_lock(q);
1244		spin_lock_bh(root_lock);
1245
1246		val = (qdisc_is_running(q) ||
1247		       test_bit(__QDISC_STATE_SCHED, &q->state));
1248
1249		spin_unlock_bh(root_lock);
1250
1251		if (val)
1252			return true;
1253	}
1254	return false;
1255}
1256
1257/**
1258 * 	dev_deactivate_many - deactivate transmissions on several devices
1259 * 	@head: list of devices to deactivate
1260 *
1261 *	This function returns only when all outstanding transmissions
1262 *	have completed, unless all devices are in dismantle phase.
1263 */
1264void dev_deactivate_many(struct list_head *head)
1265{
1266	struct net_device *dev;
1267
1268	list_for_each_entry(dev, head, close_list) {
1269		netdev_for_each_tx_queue(dev, dev_deactivate_queue,
1270					 &noop_qdisc);
1271		if (dev_ingress_queue(dev))
1272			dev_deactivate_queue(dev, dev_ingress_queue(dev),
1273					     &noop_qdisc);
1274
1275		dev_watchdog_down(dev);
1276	}
1277
1278	/* Wait for outstanding qdisc-less dev_queue_xmit calls or
1279	 * outstanding qdisc enqueuing calls.
1280	 * This is avoided if all devices are in dismantle phase :
1281	 * Caller will call synchronize_net() for us
1282	 */
1283	synchronize_net();
1284
1285	list_for_each_entry(dev, head, close_list) {
1286		netdev_for_each_tx_queue(dev, dev_reset_queue, NULL);
1287
1288		if (dev_ingress_queue(dev))
1289			dev_reset_queue(dev, dev_ingress_queue(dev), NULL);
1290	}
1291
1292	/* Wait for outstanding qdisc_run calls. */
1293	list_for_each_entry(dev, head, close_list) {
1294		while (some_qdisc_is_busy(dev)) {
1295			/* wait_event() would avoid this sleep-loop but would
1296			 * require expensive checks in the fast paths of packet
1297			 * processing which isn't worth it.
1298			 */
1299			schedule_timeout_uninterruptible(1);
1300		}
1301	}
1302}
1303
1304void dev_deactivate(struct net_device *dev)
1305{
1306	LIST_HEAD(single);
1307
1308	list_add(&dev->close_list, &single);
1309	dev_deactivate_many(&single);
1310	list_del(&single);
1311}
1312EXPORT_SYMBOL(dev_deactivate);
1313
1314static int qdisc_change_tx_queue_len(struct net_device *dev,
1315				     struct netdev_queue *dev_queue)
1316{
1317	struct Qdisc *qdisc = dev_queue->qdisc_sleeping;
1318	const struct Qdisc_ops *ops = qdisc->ops;
1319
1320	if (ops->change_tx_queue_len)
1321		return ops->change_tx_queue_len(qdisc, dev->tx_queue_len);
1322	return 0;
1323}
1324
1325void dev_qdisc_change_real_num_tx(struct net_device *dev,
1326				  unsigned int new_real_tx)
1327{
1328	struct Qdisc *qdisc = rtnl_dereference(dev->qdisc);
1329
1330	if (qdisc->ops->change_real_num_tx)
1331		qdisc->ops->change_real_num_tx(qdisc, new_real_tx);
1332}
1333
1334int dev_qdisc_change_tx_queue_len(struct net_device *dev)
1335{
1336	bool up = dev->flags & IFF_UP;
1337	unsigned int i;
1338	int ret = 0;
1339
1340	if (up)
1341		dev_deactivate(dev);
1342
1343	for (i = 0; i < dev->num_tx_queues; i++) {
1344		ret = qdisc_change_tx_queue_len(dev, &dev->_tx[i]);
1345
1346		/* TODO: revert changes on a partial failure */
1347		if (ret)
1348			break;
1349	}
1350
1351	if (up)
1352		dev_activate(dev);
1353	return ret;
1354}
1355
1356static void dev_init_scheduler_queue(struct net_device *dev,
1357				     struct netdev_queue *dev_queue,
1358				     void *_qdisc)
1359{
1360	struct Qdisc *qdisc = _qdisc;
1361
1362	rcu_assign_pointer(dev_queue->qdisc, qdisc);
1363	dev_queue->qdisc_sleeping = qdisc;
1364}
1365
1366void dev_init_scheduler(struct net_device *dev)
1367{
1368	rcu_assign_pointer(dev->qdisc, &noop_qdisc);
1369	netdev_for_each_tx_queue(dev, dev_init_scheduler_queue, &noop_qdisc);
1370	if (dev_ingress_queue(dev))
1371		dev_init_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc);
1372
1373	timer_setup(&dev->watchdog_timer, dev_watchdog, 0);
1374}
1375
1376void dev_shutdown(struct net_device *dev)
1377{
1378	netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc);
1379	if (dev_ingress_queue(dev))
1380		shutdown_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc);
1381	qdisc_put(rtnl_dereference(dev->qdisc));
1382	rcu_assign_pointer(dev->qdisc, &noop_qdisc);
1383
1384	WARN_ON(timer_pending(&dev->watchdog_timer));
1385}
1386
1387void psched_ratecfg_precompute(struct psched_ratecfg *r,
1388			       const struct tc_ratespec *conf,
1389			       u64 rate64)
1390{
1391	memset(r, 0, sizeof(*r));
1392	r->overhead = conf->overhead;
1393	r->mpu = conf->mpu;
1394	r->rate_bytes_ps = max_t(u64, conf->rate, rate64);
1395	r->linklayer = (conf->linklayer & TC_LINKLAYER_MASK);
1396	r->mult = 1;
1397	/*
1398	 * The deal here is to replace a divide by a reciprocal one
1399	 * in fast path (a reciprocal divide is a multiply and a shift)
1400	 *
1401	 * Normal formula would be :
1402	 *  time_in_ns = (NSEC_PER_SEC * len) / rate_bps
1403	 *
1404	 * We compute mult/shift to use instead :
1405	 *  time_in_ns = (len * mult) >> shift;
1406	 *
1407	 * We try to get the highest possible mult value for accuracy,
1408	 * but have to make sure no overflows will ever happen.
1409	 */
1410	if (r->rate_bytes_ps > 0) {
1411		u64 factor = NSEC_PER_SEC;
1412
1413		for (;;) {
1414			r->mult = div64_u64(factor, r->rate_bytes_ps);
1415			if (r->mult & (1U << 31) || factor & (1ULL << 63))
1416				break;
1417			factor <<= 1;
1418			r->shift++;
1419		}
1420	}
1421}
1422EXPORT_SYMBOL(psched_ratecfg_precompute);
1423
1424static void mini_qdisc_rcu_func(struct rcu_head *head)
1425{
1426}
1427
1428void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp,
1429			  struct tcf_proto *tp_head)
1430{
1431	/* Protected with chain0->filter_chain_lock.
1432	 * Can't access chain directly because tp_head can be NULL.
1433	 */
1434	struct mini_Qdisc *miniq_old =
1435		rcu_dereference_protected(*miniqp->p_miniq, 1);
1436	struct mini_Qdisc *miniq;
1437
1438	if (!tp_head) {
1439		RCU_INIT_POINTER(*miniqp->p_miniq, NULL);
1440		/* Wait for flying RCU callback before it is freed. */
1441		rcu_barrier();
1442		return;
1443	}
1444
1445	miniq = !miniq_old || miniq_old == &miniqp->miniq2 ?
1446		&miniqp->miniq1 : &miniqp->miniq2;
1447
1448	/* We need to make sure that readers won't see the miniq
1449	 * we are about to modify. So wait until previous call_rcu callback
1450	 * is done.
1451	 */
1452	rcu_barrier();
1453	miniq->filter_list = tp_head;
1454	rcu_assign_pointer(*miniqp->p_miniq, miniq);
1455
1456	if (miniq_old)
1457		/* This is counterpart of the rcu barriers above. We need to
1458		 * block potential new user of miniq_old until all readers
1459		 * are not seeing it.
1460		 */
1461		call_rcu(&miniq_old->rcu, mini_qdisc_rcu_func);
1462}
1463EXPORT_SYMBOL(mini_qdisc_pair_swap);
1464
1465void mini_qdisc_pair_block_init(struct mini_Qdisc_pair *miniqp,
1466				struct tcf_block *block)
1467{
1468	miniqp->miniq1.block = block;
1469	miniqp->miniq2.block = block;
1470}
1471EXPORT_SYMBOL(mini_qdisc_pair_block_init);
1472
1473void mini_qdisc_pair_init(struct mini_Qdisc_pair *miniqp, struct Qdisc *qdisc,
1474			  struct mini_Qdisc __rcu **p_miniq)
1475{
1476	miniqp->miniq1.cpu_bstats = qdisc->cpu_bstats;
1477	miniqp->miniq1.cpu_qstats = qdisc->cpu_qstats;
1478	miniqp->miniq2.cpu_bstats = qdisc->cpu_bstats;
1479	miniqp->miniq2.cpu_qstats = qdisc->cpu_qstats;
1480	miniqp->p_miniq = p_miniq;
1481}
1482EXPORT_SYMBOL(mini_qdisc_pair_init);
1483