xref: /kernel/linux/linux-5.10/drivers/net/ifb.c (revision 8c2ecf20)
1// SPDX-License-Identifier: GPL-2.0-or-later
2/* drivers/net/ifb.c:
3
4	The purpose of this driver is to provide a device that allows
5	for sharing of resources:
6
7	1) qdiscs/policies that are per device as opposed to system wide.
8	ifb allows for a device which can be redirected to thus providing
9	an impression of sharing.
10
11	2) Allows for queueing incoming traffic for shaping instead of
12	dropping.
13
14	The original concept is based on what is known as the IMQ
15	driver initially written by Martin Devera, later rewritten
16	by Patrick McHardy and then maintained by Andre Correa.
17
18	You need the tc action  mirror or redirect to feed this device
19       	packets.
20
21
22  	Authors:	Jamal Hadi Salim (2005)
23
24*/
25
26
27#include <linux/module.h>
28#include <linux/kernel.h>
29#include <linux/netdevice.h>
30#include <linux/etherdevice.h>
31#include <linux/init.h>
32#include <linux/interrupt.h>
33#include <linux/moduleparam.h>
34#include <net/pkt_sched.h>
35#include <net/net_namespace.h>
36
37#define TX_Q_LIMIT    32
38struct ifb_q_private {
39	struct net_device	*dev;
40	struct tasklet_struct   ifb_tasklet;
41	int			tasklet_pending;
42	int			txqnum;
43	struct sk_buff_head     rq;
44	u64			rx_packets;
45	u64			rx_bytes;
46	struct u64_stats_sync	rsync;
47
48	struct u64_stats_sync	tsync;
49	u64			tx_packets;
50	u64			tx_bytes;
51	struct sk_buff_head     tq;
52} ____cacheline_aligned_in_smp;
53
54struct ifb_dev_private {
55	struct ifb_q_private *tx_private;
56};
57
58static netdev_tx_t ifb_xmit(struct sk_buff *skb, struct net_device *dev);
59static int ifb_open(struct net_device *dev);
60static int ifb_close(struct net_device *dev);
61
62static void ifb_ri_tasklet(unsigned long _txp)
63{
64	struct ifb_q_private *txp = (struct ifb_q_private *)_txp;
65	struct netdev_queue *txq;
66	struct sk_buff *skb;
67
68	txq = netdev_get_tx_queue(txp->dev, txp->txqnum);
69	skb = skb_peek(&txp->tq);
70	if (!skb) {
71		if (!__netif_tx_trylock(txq))
72			goto resched;
73		skb_queue_splice_tail_init(&txp->rq, &txp->tq);
74		__netif_tx_unlock(txq);
75	}
76
77	while ((skb = __skb_dequeue(&txp->tq)) != NULL) {
78		skb->redirected = 0;
79#ifdef CONFIG_NET_CLS_ACT
80		skb->tc_skip_classify = 1;
81#endif
82
83		u64_stats_update_begin(&txp->tsync);
84		txp->tx_packets++;
85		txp->tx_bytes += skb->len;
86		u64_stats_update_end(&txp->tsync);
87
88		rcu_read_lock();
89		skb->dev = dev_get_by_index_rcu(dev_net(txp->dev), skb->skb_iif);
90		if (!skb->dev) {
91			rcu_read_unlock();
92			dev_kfree_skb(skb);
93			txp->dev->stats.tx_dropped++;
94			if (skb_queue_len(&txp->tq) != 0)
95				goto resched;
96			break;
97		}
98		rcu_read_unlock();
99		skb->skb_iif = txp->dev->ifindex;
100
101		if (!skb->from_ingress) {
102			dev_queue_xmit(skb);
103		} else {
104			skb_pull_rcsum(skb, skb->mac_len);
105			netif_receive_skb(skb);
106		}
107	}
108
109	if (__netif_tx_trylock(txq)) {
110		skb = skb_peek(&txp->rq);
111		if (!skb) {
112			txp->tasklet_pending = 0;
113			if (netif_tx_queue_stopped(txq))
114				netif_tx_wake_queue(txq);
115		} else {
116			__netif_tx_unlock(txq);
117			goto resched;
118		}
119		__netif_tx_unlock(txq);
120	} else {
121resched:
122		txp->tasklet_pending = 1;
123		tasklet_schedule(&txp->ifb_tasklet);
124	}
125
126}
127
128static void ifb_stats64(struct net_device *dev,
129			struct rtnl_link_stats64 *stats)
130{
131	struct ifb_dev_private *dp = netdev_priv(dev);
132	struct ifb_q_private *txp = dp->tx_private;
133	unsigned int start;
134	u64 packets, bytes;
135	int i;
136
137	for (i = 0; i < dev->num_tx_queues; i++,txp++) {
138		do {
139			start = u64_stats_fetch_begin_irq(&txp->rsync);
140			packets = txp->rx_packets;
141			bytes = txp->rx_bytes;
142		} while (u64_stats_fetch_retry_irq(&txp->rsync, start));
143		stats->rx_packets += packets;
144		stats->rx_bytes += bytes;
145
146		do {
147			start = u64_stats_fetch_begin_irq(&txp->tsync);
148			packets = txp->tx_packets;
149			bytes = txp->tx_bytes;
150		} while (u64_stats_fetch_retry_irq(&txp->tsync, start));
151		stats->tx_packets += packets;
152		stats->tx_bytes += bytes;
153	}
154	stats->rx_dropped = dev->stats.rx_dropped;
155	stats->tx_dropped = dev->stats.tx_dropped;
156}
157
158static int ifb_dev_init(struct net_device *dev)
159{
160	struct ifb_dev_private *dp = netdev_priv(dev);
161	struct ifb_q_private *txp;
162	int i;
163
164	txp = kcalloc(dev->num_tx_queues, sizeof(*txp), GFP_KERNEL);
165	if (!txp)
166		return -ENOMEM;
167	dp->tx_private = txp;
168	for (i = 0; i < dev->num_tx_queues; i++,txp++) {
169		txp->txqnum = i;
170		txp->dev = dev;
171		__skb_queue_head_init(&txp->rq);
172		__skb_queue_head_init(&txp->tq);
173		u64_stats_init(&txp->rsync);
174		u64_stats_init(&txp->tsync);
175		tasklet_init(&txp->ifb_tasklet, ifb_ri_tasklet,
176			     (unsigned long)txp);
177		netif_tx_start_queue(netdev_get_tx_queue(dev, i));
178	}
179	return 0;
180}
181
182static const struct net_device_ops ifb_netdev_ops = {
183	.ndo_open	= ifb_open,
184	.ndo_stop	= ifb_close,
185	.ndo_get_stats64 = ifb_stats64,
186	.ndo_start_xmit	= ifb_xmit,
187	.ndo_validate_addr = eth_validate_addr,
188	.ndo_init	= ifb_dev_init,
189};
190
191#define IFB_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG  | NETIF_F_FRAGLIST	| \
192		      NETIF_F_TSO_ECN | NETIF_F_TSO | NETIF_F_TSO6	| \
193		      NETIF_F_GSO_ENCAP_ALL 				| \
194		      NETIF_F_HIGHDMA | NETIF_F_HW_VLAN_CTAG_TX		| \
195		      NETIF_F_HW_VLAN_STAG_TX)
196
197static void ifb_dev_free(struct net_device *dev)
198{
199	struct ifb_dev_private *dp = netdev_priv(dev);
200	struct ifb_q_private *txp = dp->tx_private;
201	int i;
202
203	for (i = 0; i < dev->num_tx_queues; i++,txp++) {
204		tasklet_kill(&txp->ifb_tasklet);
205		__skb_queue_purge(&txp->rq);
206		__skb_queue_purge(&txp->tq);
207	}
208	kfree(dp->tx_private);
209}
210
211static void ifb_setup(struct net_device *dev)
212{
213	/* Initialize the device structure. */
214	dev->netdev_ops = &ifb_netdev_ops;
215
216	/* Fill in device structure with ethernet-generic values. */
217	ether_setup(dev);
218	dev->tx_queue_len = TX_Q_LIMIT;
219
220	dev->features |= IFB_FEATURES;
221	dev->hw_features |= dev->features;
222	dev->hw_enc_features |= dev->features;
223	dev->vlan_features |= IFB_FEATURES & ~(NETIF_F_HW_VLAN_CTAG_TX |
224					       NETIF_F_HW_VLAN_STAG_TX);
225
226	dev->flags |= IFF_NOARP;
227	dev->flags &= ~IFF_MULTICAST;
228	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
229	netif_keep_dst(dev);
230	eth_hw_addr_random(dev);
231	dev->needs_free_netdev = true;
232	dev->priv_destructor = ifb_dev_free;
233
234	dev->min_mtu = 0;
235	dev->max_mtu = 0;
236}
237
238static netdev_tx_t ifb_xmit(struct sk_buff *skb, struct net_device *dev)
239{
240	struct ifb_dev_private *dp = netdev_priv(dev);
241	struct ifb_q_private *txp = dp->tx_private + skb_get_queue_mapping(skb);
242
243	u64_stats_update_begin(&txp->rsync);
244	txp->rx_packets++;
245	txp->rx_bytes += skb->len;
246	u64_stats_update_end(&txp->rsync);
247
248	if (!skb->redirected || !skb->skb_iif) {
249		dev_kfree_skb(skb);
250		dev->stats.rx_dropped++;
251		return NETDEV_TX_OK;
252	}
253
254	if (skb_queue_len(&txp->rq) >= dev->tx_queue_len)
255		netif_tx_stop_queue(netdev_get_tx_queue(dev, txp->txqnum));
256
257	__skb_queue_tail(&txp->rq, skb);
258	if (!txp->tasklet_pending) {
259		txp->tasklet_pending = 1;
260		tasklet_schedule(&txp->ifb_tasklet);
261	}
262
263	return NETDEV_TX_OK;
264}
265
266static int ifb_close(struct net_device *dev)
267{
268	netif_tx_stop_all_queues(dev);
269	return 0;
270}
271
272static int ifb_open(struct net_device *dev)
273{
274	netif_tx_start_all_queues(dev);
275	return 0;
276}
277
278static int ifb_validate(struct nlattr *tb[], struct nlattr *data[],
279			struct netlink_ext_ack *extack)
280{
281	if (tb[IFLA_ADDRESS]) {
282		if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
283			return -EINVAL;
284		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
285			return -EADDRNOTAVAIL;
286	}
287	return 0;
288}
289
290static struct rtnl_link_ops ifb_link_ops __read_mostly = {
291	.kind		= "ifb",
292	.priv_size	= sizeof(struct ifb_dev_private),
293	.setup		= ifb_setup,
294	.validate	= ifb_validate,
295};
296
297/* Number of ifb devices to be set up by this module.
298 * Note that these legacy devices have one queue.
299 * Prefer something like : ip link add ifb10 numtxqueues 8 type ifb
300 */
301static int numifbs = 2;
302module_param(numifbs, int, 0);
303MODULE_PARM_DESC(numifbs, "Number of ifb devices");
304
305static int __init ifb_init_one(int index)
306{
307	struct net_device *dev_ifb;
308	int err;
309
310	dev_ifb = alloc_netdev(sizeof(struct ifb_dev_private), "ifb%d",
311			       NET_NAME_UNKNOWN, ifb_setup);
312
313	if (!dev_ifb)
314		return -ENOMEM;
315
316	dev_ifb->rtnl_link_ops = &ifb_link_ops;
317	err = register_netdevice(dev_ifb);
318	if (err < 0)
319		goto err;
320
321	return 0;
322
323err:
324	free_netdev(dev_ifb);
325	return err;
326}
327
328static int __init ifb_init_module(void)
329{
330	int i, err;
331
332	down_write(&pernet_ops_rwsem);
333	rtnl_lock();
334	err = __rtnl_link_register(&ifb_link_ops);
335	if (err < 0)
336		goto out;
337
338	for (i = 0; i < numifbs && !err; i++) {
339		err = ifb_init_one(i);
340		cond_resched();
341	}
342	if (err)
343		__rtnl_link_unregister(&ifb_link_ops);
344
345out:
346	rtnl_unlock();
347	up_write(&pernet_ops_rwsem);
348
349	return err;
350}
351
352static void __exit ifb_cleanup_module(void)
353{
354	rtnl_link_unregister(&ifb_link_ops);
355}
356
357module_init(ifb_init_module);
358module_exit(ifb_cleanup_module);
359MODULE_LICENSE("GPL");
360MODULE_AUTHOR("Jamal Hadi Salim");
361MODULE_ALIAS_RTNL_LINK("ifb");
362