1// SPDX-License-Identifier: GPL-2.0-only 2/* 3 * drivers/net/veth.c 4 * 5 * Copyright (C) 2007 OpenVZ http://openvz.org, SWsoft Inc 6 * 7 * Author: Pavel Emelianov <xemul@openvz.org> 8 * Ethtool interface from: Eric W. Biederman <ebiederm@xmission.com> 9 * 10 */ 11 12#include <linux/netdevice.h> 13#include <linux/slab.h> 14#include <linux/ethtool.h> 15#include <linux/etherdevice.h> 16#include <linux/u64_stats_sync.h> 17 18#include <net/rtnetlink.h> 19#include <net/dst.h> 20#include <net/xfrm.h> 21#include <net/xdp.h> 22#include <linux/veth.h> 23#include <linux/module.h> 24#include <linux/bpf.h> 25#include <linux/filter.h> 26#include <linux/ptr_ring.h> 27#include <linux/bpf_trace.h> 28#include <linux/net_tstamp.h> 29 30#define DRV_NAME "veth" 31#define DRV_VERSION "1.0" 32 33#define VETH_XDP_FLAG BIT(0) 34#define VETH_RING_SIZE 256 35#define VETH_XDP_HEADROOM (XDP_PACKET_HEADROOM + NET_IP_ALIGN) 36 37#define VETH_XDP_TX_BULK_SIZE 16 38 39struct veth_stats { 40 u64 rx_drops; 41 /* xdp */ 42 u64 xdp_packets; 43 u64 xdp_bytes; 44 u64 xdp_redirect; 45 u64 xdp_drops; 46 u64 xdp_tx; 47 u64 xdp_tx_err; 48 u64 peer_tq_xdp_xmit; 49 u64 peer_tq_xdp_xmit_err; 50}; 51 52struct veth_rq_stats { 53 struct veth_stats vs; 54 struct u64_stats_sync syncp; 55}; 56 57struct veth_rq { 58 struct napi_struct xdp_napi; 59 struct net_device *dev; 60 struct bpf_prog __rcu *xdp_prog; 61 struct xdp_mem_info xdp_mem; 62 struct veth_rq_stats stats; 63 bool rx_notify_masked; 64 struct ptr_ring xdp_ring; 65 struct xdp_rxq_info xdp_rxq; 66}; 67 68struct veth_priv { 69 struct net_device __rcu *peer; 70 atomic64_t dropped; 71 struct bpf_prog *_xdp_prog; 72 struct veth_rq *rq; 73 unsigned int requested_headroom; 74}; 75 76struct veth_xdp_tx_bq { 77 struct xdp_frame *q[VETH_XDP_TX_BULK_SIZE]; 78 unsigned int count; 79}; 80 81/* 82 * ethtool interface 83 */ 84 85struct veth_q_stat_desc { 86 char desc[ETH_GSTRING_LEN]; 87 size_t offset; 88}; 89 90#define VETH_RQ_STAT(m) offsetof(struct veth_stats, m) 91 92static const struct veth_q_stat_desc veth_rq_stats_desc[] = { 93 { "xdp_packets", VETH_RQ_STAT(xdp_packets) }, 94 { "xdp_bytes", VETH_RQ_STAT(xdp_bytes) }, 95 { "drops", VETH_RQ_STAT(rx_drops) }, 96 { "xdp_redirect", VETH_RQ_STAT(xdp_redirect) }, 97 { "xdp_drops", VETH_RQ_STAT(xdp_drops) }, 98 { "xdp_tx", VETH_RQ_STAT(xdp_tx) }, 99 { "xdp_tx_errors", VETH_RQ_STAT(xdp_tx_err) }, 100}; 101 102#define VETH_RQ_STATS_LEN ARRAY_SIZE(veth_rq_stats_desc) 103 104static const struct veth_q_stat_desc veth_tq_stats_desc[] = { 105 { "xdp_xmit", VETH_RQ_STAT(peer_tq_xdp_xmit) }, 106 { "xdp_xmit_errors", VETH_RQ_STAT(peer_tq_xdp_xmit_err) }, 107}; 108 109#define VETH_TQ_STATS_LEN ARRAY_SIZE(veth_tq_stats_desc) 110 111static struct { 112 const char string[ETH_GSTRING_LEN]; 113} ethtool_stats_keys[] = { 114 { "peer_ifindex" }, 115}; 116 117static int veth_get_link_ksettings(struct net_device *dev, 118 struct ethtool_link_ksettings *cmd) 119{ 120 cmd->base.speed = SPEED_10000; 121 cmd->base.duplex = DUPLEX_FULL; 122 cmd->base.port = PORT_TP; 123 cmd->base.autoneg = AUTONEG_DISABLE; 124 return 0; 125} 126 127static void veth_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) 128{ 129 strlcpy(info->driver, DRV_NAME, sizeof(info->driver)); 130 strlcpy(info->version, DRV_VERSION, sizeof(info->version)); 131} 132 133static void veth_get_strings(struct net_device *dev, u32 stringset, u8 *buf) 134{ 135 char *p = (char *)buf; 136 int i, j; 137 138 switch(stringset) { 139 case ETH_SS_STATS: 140 memcpy(p, ðtool_stats_keys, sizeof(ethtool_stats_keys)); 141 p += sizeof(ethtool_stats_keys); 142 for (i = 0; i < dev->real_num_rx_queues; i++) { 143 for (j = 0; j < VETH_RQ_STATS_LEN; j++) { 144 snprintf(p, ETH_GSTRING_LEN, 145 "rx_queue_%u_%.18s", 146 i, veth_rq_stats_desc[j].desc); 147 p += ETH_GSTRING_LEN; 148 } 149 } 150 for (i = 0; i < dev->real_num_tx_queues; i++) { 151 for (j = 0; j < VETH_TQ_STATS_LEN; j++) { 152 snprintf(p, ETH_GSTRING_LEN, 153 "tx_queue_%u_%.18s", 154 i, veth_tq_stats_desc[j].desc); 155 p += ETH_GSTRING_LEN; 156 } 157 } 158 break; 159 } 160} 161 162static int veth_get_sset_count(struct net_device *dev, int sset) 163{ 164 switch (sset) { 165 case ETH_SS_STATS: 166 return ARRAY_SIZE(ethtool_stats_keys) + 167 VETH_RQ_STATS_LEN * dev->real_num_rx_queues + 168 VETH_TQ_STATS_LEN * dev->real_num_tx_queues; 169 default: 170 return -EOPNOTSUPP; 171 } 172} 173 174static void veth_get_ethtool_stats(struct net_device *dev, 175 struct ethtool_stats *stats, u64 *data) 176{ 177 struct veth_priv *rcv_priv, *priv = netdev_priv(dev); 178 struct net_device *peer = rtnl_dereference(priv->peer); 179 int i, j, idx; 180 181 data[0] = peer ? peer->ifindex : 0; 182 idx = 1; 183 for (i = 0; i < dev->real_num_rx_queues; i++) { 184 const struct veth_rq_stats *rq_stats = &priv->rq[i].stats; 185 const void *stats_base = (void *)&rq_stats->vs; 186 unsigned int start; 187 size_t offset; 188 189 do { 190 start = u64_stats_fetch_begin_irq(&rq_stats->syncp); 191 for (j = 0; j < VETH_RQ_STATS_LEN; j++) { 192 offset = veth_rq_stats_desc[j].offset; 193 data[idx + j] = *(u64 *)(stats_base + offset); 194 } 195 } while (u64_stats_fetch_retry_irq(&rq_stats->syncp, start)); 196 idx += VETH_RQ_STATS_LEN; 197 } 198 199 if (!peer) 200 return; 201 202 rcv_priv = netdev_priv(peer); 203 for (i = 0; i < peer->real_num_rx_queues; i++) { 204 const struct veth_rq_stats *rq_stats = &rcv_priv->rq[i].stats; 205 const void *base = (void *)&rq_stats->vs; 206 unsigned int start, tx_idx = idx; 207 size_t offset; 208 209 tx_idx += (i % dev->real_num_tx_queues) * VETH_TQ_STATS_LEN; 210 do { 211 start = u64_stats_fetch_begin_irq(&rq_stats->syncp); 212 for (j = 0; j < VETH_TQ_STATS_LEN; j++) { 213 offset = veth_tq_stats_desc[j].offset; 214 data[tx_idx + j] += *(u64 *)(base + offset); 215 } 216 } while (u64_stats_fetch_retry_irq(&rq_stats->syncp, start)); 217 } 218} 219 220static const struct ethtool_ops veth_ethtool_ops = { 221 .get_drvinfo = veth_get_drvinfo, 222 .get_link = ethtool_op_get_link, 223 .get_strings = veth_get_strings, 224 .get_sset_count = veth_get_sset_count, 225 .get_ethtool_stats = veth_get_ethtool_stats, 226 .get_link_ksettings = veth_get_link_ksettings, 227 .get_ts_info = ethtool_op_get_ts_info, 228}; 229 230/* general routines */ 231 232static bool veth_is_xdp_frame(void *ptr) 233{ 234 return (unsigned long)ptr & VETH_XDP_FLAG; 235} 236 237static struct xdp_frame *veth_ptr_to_xdp(void *ptr) 238{ 239 return (void *)((unsigned long)ptr & ~VETH_XDP_FLAG); 240} 241 242static void *veth_xdp_to_ptr(struct xdp_frame *xdp) 243{ 244 return (void *)((unsigned long)xdp | VETH_XDP_FLAG); 245} 246 247static void veth_ptr_free(void *ptr) 248{ 249 if (veth_is_xdp_frame(ptr)) 250 xdp_return_frame(veth_ptr_to_xdp(ptr)); 251 else 252 kfree_skb(ptr); 253} 254 255static void __veth_xdp_flush(struct veth_rq *rq) 256{ 257 /* Write ptr_ring before reading rx_notify_masked */ 258 smp_mb(); 259 if (!rq->rx_notify_masked) { 260 rq->rx_notify_masked = true; 261 napi_schedule(&rq->xdp_napi); 262 } 263} 264 265static int veth_xdp_rx(struct veth_rq *rq, struct sk_buff *skb) 266{ 267 if (unlikely(ptr_ring_produce(&rq->xdp_ring, skb))) { 268 dev_kfree_skb_any(skb); 269 return NET_RX_DROP; 270 } 271 272 return NET_RX_SUCCESS; 273} 274 275static int veth_forward_skb(struct net_device *dev, struct sk_buff *skb, 276 struct veth_rq *rq, bool xdp) 277{ 278 return __dev_forward_skb(dev, skb) ?: xdp ? 279 veth_xdp_rx(rq, skb) : 280 netif_rx(skb); 281} 282 283static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev) 284{ 285 struct veth_priv *rcv_priv, *priv = netdev_priv(dev); 286 struct veth_rq *rq = NULL; 287 int ret = NETDEV_TX_OK; 288 struct net_device *rcv; 289 int length = skb->len; 290 bool rcv_xdp = false; 291 int rxq; 292 293 rcu_read_lock(); 294 rcv = rcu_dereference(priv->peer); 295 if (unlikely(!rcv) || !pskb_may_pull(skb, ETH_HLEN)) { 296 kfree_skb(skb); 297 goto drop; 298 } 299 300 rcv_priv = netdev_priv(rcv); 301 rxq = skb_get_queue_mapping(skb); 302 if (rxq < rcv->real_num_rx_queues) { 303 rq = &rcv_priv->rq[rxq]; 304 rcv_xdp = rcu_access_pointer(rq->xdp_prog); 305 } 306 307 skb_tx_timestamp(skb); 308 if (likely(veth_forward_skb(rcv, skb, rq, rcv_xdp) == NET_RX_SUCCESS)) { 309 if (!rcv_xdp) 310 dev_lstats_add(dev, length); 311 } else { 312drop: 313 atomic64_inc(&priv->dropped); 314 ret = NET_XMIT_DROP; 315 } 316 317 if (rcv_xdp) 318 __veth_xdp_flush(rq); 319 320 rcu_read_unlock(); 321 322 return ret; 323} 324 325static u64 veth_stats_tx(struct net_device *dev, u64 *packets, u64 *bytes) 326{ 327 struct veth_priv *priv = netdev_priv(dev); 328 329 dev_lstats_read(dev, packets, bytes); 330 return atomic64_read(&priv->dropped); 331} 332 333static void veth_stats_rx(struct veth_stats *result, struct net_device *dev) 334{ 335 struct veth_priv *priv = netdev_priv(dev); 336 int i; 337 338 result->peer_tq_xdp_xmit_err = 0; 339 result->xdp_packets = 0; 340 result->xdp_tx_err = 0; 341 result->xdp_bytes = 0; 342 result->rx_drops = 0; 343 for (i = 0; i < dev->num_rx_queues; i++) { 344 u64 packets, bytes, drops, xdp_tx_err, peer_tq_xdp_xmit_err; 345 struct veth_rq_stats *stats = &priv->rq[i].stats; 346 unsigned int start; 347 348 do { 349 start = u64_stats_fetch_begin_irq(&stats->syncp); 350 peer_tq_xdp_xmit_err = stats->vs.peer_tq_xdp_xmit_err; 351 xdp_tx_err = stats->vs.xdp_tx_err; 352 packets = stats->vs.xdp_packets; 353 bytes = stats->vs.xdp_bytes; 354 drops = stats->vs.rx_drops; 355 } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); 356 result->peer_tq_xdp_xmit_err += peer_tq_xdp_xmit_err; 357 result->xdp_tx_err += xdp_tx_err; 358 result->xdp_packets += packets; 359 result->xdp_bytes += bytes; 360 result->rx_drops += drops; 361 } 362} 363 364static void veth_get_stats64(struct net_device *dev, 365 struct rtnl_link_stats64 *tot) 366{ 367 struct veth_priv *priv = netdev_priv(dev); 368 struct net_device *peer; 369 struct veth_stats rx; 370 u64 packets, bytes; 371 372 tot->tx_dropped = veth_stats_tx(dev, &packets, &bytes); 373 tot->tx_bytes = bytes; 374 tot->tx_packets = packets; 375 376 veth_stats_rx(&rx, dev); 377 tot->tx_dropped += rx.xdp_tx_err; 378 tot->rx_dropped = rx.rx_drops + rx.peer_tq_xdp_xmit_err; 379 tot->rx_bytes = rx.xdp_bytes; 380 tot->rx_packets = rx.xdp_packets; 381 382 rcu_read_lock(); 383 peer = rcu_dereference(priv->peer); 384 if (peer) { 385 veth_stats_tx(peer, &packets, &bytes); 386 tot->rx_bytes += bytes; 387 tot->rx_packets += packets; 388 389 veth_stats_rx(&rx, peer); 390 tot->tx_dropped += rx.peer_tq_xdp_xmit_err; 391 tot->rx_dropped += rx.xdp_tx_err; 392 tot->tx_bytes += rx.xdp_bytes; 393 tot->tx_packets += rx.xdp_packets; 394 } 395 rcu_read_unlock(); 396} 397 398/* fake multicast ability */ 399static void veth_set_multicast_list(struct net_device *dev) 400{ 401} 402 403static struct sk_buff *veth_build_skb(void *head, int headroom, int len, 404 int buflen) 405{ 406 struct sk_buff *skb; 407 408 skb = build_skb(head, buflen); 409 if (!skb) 410 return NULL; 411 412 skb_reserve(skb, headroom); 413 skb_put(skb, len); 414 415 return skb; 416} 417 418static int veth_select_rxq(struct net_device *dev) 419{ 420 return smp_processor_id() % dev->real_num_rx_queues; 421} 422 423static struct net_device *veth_peer_dev(struct net_device *dev) 424{ 425 struct veth_priv *priv = netdev_priv(dev); 426 427 /* Callers must be under RCU read side. */ 428 return rcu_dereference(priv->peer); 429} 430 431static int veth_xdp_xmit(struct net_device *dev, int n, 432 struct xdp_frame **frames, 433 u32 flags, bool ndo_xmit) 434{ 435 struct veth_priv *rcv_priv, *priv = netdev_priv(dev); 436 int i, ret = -ENXIO, drops = 0; 437 struct net_device *rcv; 438 unsigned int max_len; 439 struct veth_rq *rq; 440 441 if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) 442 return -EINVAL; 443 444 rcu_read_lock(); 445 rcv = rcu_dereference(priv->peer); 446 if (unlikely(!rcv)) 447 goto out; 448 449 rcv_priv = netdev_priv(rcv); 450 rq = &rcv_priv->rq[veth_select_rxq(rcv)]; 451 /* Non-NULL xdp_prog ensures that xdp_ring is initialized on receive 452 * side. This means an XDP program is loaded on the peer and the peer 453 * device is up. 454 */ 455 if (!rcu_access_pointer(rq->xdp_prog)) 456 goto out; 457 458 max_len = rcv->mtu + rcv->hard_header_len + VLAN_HLEN; 459 460 spin_lock(&rq->xdp_ring.producer_lock); 461 for (i = 0; i < n; i++) { 462 struct xdp_frame *frame = frames[i]; 463 void *ptr = veth_xdp_to_ptr(frame); 464 465 if (unlikely(frame->len > max_len || 466 __ptr_ring_produce(&rq->xdp_ring, ptr))) { 467 xdp_return_frame_rx_napi(frame); 468 drops++; 469 } 470 } 471 spin_unlock(&rq->xdp_ring.producer_lock); 472 473 if (flags & XDP_XMIT_FLUSH) 474 __veth_xdp_flush(rq); 475 476 ret = n - drops; 477 if (ndo_xmit) { 478 u64_stats_update_begin(&rq->stats.syncp); 479 rq->stats.vs.peer_tq_xdp_xmit += n - drops; 480 rq->stats.vs.peer_tq_xdp_xmit_err += drops; 481 u64_stats_update_end(&rq->stats.syncp); 482 } 483 484out: 485 rcu_read_unlock(); 486 487 return ret; 488} 489 490static int veth_ndo_xdp_xmit(struct net_device *dev, int n, 491 struct xdp_frame **frames, u32 flags) 492{ 493 int err; 494 495 err = veth_xdp_xmit(dev, n, frames, flags, true); 496 if (err < 0) { 497 struct veth_priv *priv = netdev_priv(dev); 498 499 atomic64_add(n, &priv->dropped); 500 } 501 502 return err; 503} 504 505static void veth_xdp_flush_bq(struct veth_rq *rq, struct veth_xdp_tx_bq *bq) 506{ 507 int sent, i, err = 0; 508 509 sent = veth_xdp_xmit(rq->dev, bq->count, bq->q, 0, false); 510 if (sent < 0) { 511 err = sent; 512 sent = 0; 513 for (i = 0; i < bq->count; i++) 514 xdp_return_frame(bq->q[i]); 515 } 516 trace_xdp_bulk_tx(rq->dev, sent, bq->count - sent, err); 517 518 u64_stats_update_begin(&rq->stats.syncp); 519 rq->stats.vs.xdp_tx += sent; 520 rq->stats.vs.xdp_tx_err += bq->count - sent; 521 u64_stats_update_end(&rq->stats.syncp); 522 523 bq->count = 0; 524} 525 526static void veth_xdp_flush(struct veth_rq *rq, struct veth_xdp_tx_bq *bq) 527{ 528 struct veth_priv *rcv_priv, *priv = netdev_priv(rq->dev); 529 struct net_device *rcv; 530 struct veth_rq *rcv_rq; 531 532 rcu_read_lock(); 533 veth_xdp_flush_bq(rq, bq); 534 rcv = rcu_dereference(priv->peer); 535 if (unlikely(!rcv)) 536 goto out; 537 538 rcv_priv = netdev_priv(rcv); 539 rcv_rq = &rcv_priv->rq[veth_select_rxq(rcv)]; 540 /* xdp_ring is initialized on receive side? */ 541 if (unlikely(!rcu_access_pointer(rcv_rq->xdp_prog))) 542 goto out; 543 544 __veth_xdp_flush(rcv_rq); 545out: 546 rcu_read_unlock(); 547} 548 549static int veth_xdp_tx(struct veth_rq *rq, struct xdp_buff *xdp, 550 struct veth_xdp_tx_bq *bq) 551{ 552 struct xdp_frame *frame = xdp_convert_buff_to_frame(xdp); 553 554 if (unlikely(!frame)) 555 return -EOVERFLOW; 556 557 if (unlikely(bq->count == VETH_XDP_TX_BULK_SIZE)) 558 veth_xdp_flush_bq(rq, bq); 559 560 bq->q[bq->count++] = frame; 561 562 return 0; 563} 564 565static struct sk_buff *veth_xdp_rcv_one(struct veth_rq *rq, 566 struct xdp_frame *frame, 567 struct veth_xdp_tx_bq *bq, 568 struct veth_stats *stats) 569{ 570 void *hard_start = frame->data - frame->headroom; 571 int len = frame->len, delta = 0; 572 struct xdp_frame orig_frame; 573 struct bpf_prog *xdp_prog; 574 unsigned int headroom; 575 struct sk_buff *skb; 576 577 /* bpf_xdp_adjust_head() assures BPF cannot access xdp_frame area */ 578 hard_start -= sizeof(struct xdp_frame); 579 580 rcu_read_lock(); 581 xdp_prog = rcu_dereference(rq->xdp_prog); 582 if (likely(xdp_prog)) { 583 struct xdp_buff xdp; 584 u32 act; 585 586 xdp_convert_frame_to_buff(frame, &xdp); 587 xdp.rxq = &rq->xdp_rxq; 588 589 act = bpf_prog_run_xdp(xdp_prog, &xdp); 590 591 switch (act) { 592 case XDP_PASS: 593 delta = frame->data - xdp.data; 594 len = xdp.data_end - xdp.data; 595 break; 596 case XDP_TX: 597 orig_frame = *frame; 598 xdp.rxq->mem = frame->mem; 599 if (unlikely(veth_xdp_tx(rq, &xdp, bq) < 0)) { 600 trace_xdp_exception(rq->dev, xdp_prog, act); 601 frame = &orig_frame; 602 stats->rx_drops++; 603 goto err_xdp; 604 } 605 stats->xdp_tx++; 606 rcu_read_unlock(); 607 goto xdp_xmit; 608 case XDP_REDIRECT: 609 orig_frame = *frame; 610 xdp.rxq->mem = frame->mem; 611 if (xdp_do_redirect(rq->dev, &xdp, xdp_prog)) { 612 frame = &orig_frame; 613 stats->rx_drops++; 614 goto err_xdp; 615 } 616 stats->xdp_redirect++; 617 rcu_read_unlock(); 618 goto xdp_xmit; 619 default: 620 bpf_warn_invalid_xdp_action(act); 621 fallthrough; 622 case XDP_ABORTED: 623 trace_xdp_exception(rq->dev, xdp_prog, act); 624 fallthrough; 625 case XDP_DROP: 626 stats->xdp_drops++; 627 goto err_xdp; 628 } 629 } 630 rcu_read_unlock(); 631 632 headroom = sizeof(struct xdp_frame) + frame->headroom - delta; 633 skb = veth_build_skb(hard_start, headroom, len, frame->frame_sz); 634 if (!skb) { 635 xdp_return_frame(frame); 636 stats->rx_drops++; 637 goto err; 638 } 639 640 xdp_release_frame(frame); 641 xdp_scrub_frame(frame); 642 skb->protocol = eth_type_trans(skb, rq->dev); 643err: 644 return skb; 645err_xdp: 646 rcu_read_unlock(); 647 xdp_return_frame(frame); 648xdp_xmit: 649 return NULL; 650} 651 652static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq, 653 struct sk_buff *skb, 654 struct veth_xdp_tx_bq *bq, 655 struct veth_stats *stats) 656{ 657 u32 pktlen, headroom, act, metalen; 658 void *orig_data, *orig_data_end; 659 struct bpf_prog *xdp_prog; 660 int mac_len, delta, off; 661 struct xdp_buff xdp; 662 663 skb_orphan(skb); 664 665 rcu_read_lock(); 666 xdp_prog = rcu_dereference(rq->xdp_prog); 667 if (unlikely(!xdp_prog)) { 668 rcu_read_unlock(); 669 goto out; 670 } 671 672 mac_len = skb->data - skb_mac_header(skb); 673 pktlen = skb->len + mac_len; 674 headroom = skb_headroom(skb) - mac_len; 675 676 if (skb_shared(skb) || skb_head_is_locked(skb) || 677 skb_is_nonlinear(skb) || headroom < XDP_PACKET_HEADROOM) { 678 struct sk_buff *nskb; 679 int size, head_off; 680 void *head, *start; 681 struct page *page; 682 683 size = SKB_DATA_ALIGN(VETH_XDP_HEADROOM + pktlen) + 684 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 685 if (size > PAGE_SIZE) 686 goto drop; 687 688 page = alloc_page(GFP_ATOMIC | __GFP_NOWARN); 689 if (!page) 690 goto drop; 691 692 head = page_address(page); 693 start = head + VETH_XDP_HEADROOM; 694 if (skb_copy_bits(skb, -mac_len, start, pktlen)) { 695 page_frag_free(head); 696 goto drop; 697 } 698 699 nskb = veth_build_skb(head, VETH_XDP_HEADROOM + mac_len, 700 skb->len, PAGE_SIZE); 701 if (!nskb) { 702 page_frag_free(head); 703 goto drop; 704 } 705 706 skb_copy_header(nskb, skb); 707 head_off = skb_headroom(nskb) - skb_headroom(skb); 708 skb_headers_offset_update(nskb, head_off); 709 consume_skb(skb); 710 skb = nskb; 711 } 712 713 xdp.data_hard_start = skb->head; 714 xdp.data = skb_mac_header(skb); 715 xdp.data_end = xdp.data + pktlen; 716 xdp.data_meta = xdp.data; 717 xdp.rxq = &rq->xdp_rxq; 718 719 /* SKB "head" area always have tailroom for skb_shared_info */ 720 xdp.frame_sz = (void *)skb_end_pointer(skb) - xdp.data_hard_start; 721 xdp.frame_sz += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 722 723 orig_data = xdp.data; 724 orig_data_end = xdp.data_end; 725 726 act = bpf_prog_run_xdp(xdp_prog, &xdp); 727 728 switch (act) { 729 case XDP_PASS: 730 break; 731 case XDP_TX: 732 get_page(virt_to_page(xdp.data)); 733 consume_skb(skb); 734 xdp.rxq->mem = rq->xdp_mem; 735 if (unlikely(veth_xdp_tx(rq, &xdp, bq) < 0)) { 736 trace_xdp_exception(rq->dev, xdp_prog, act); 737 stats->rx_drops++; 738 goto err_xdp; 739 } 740 stats->xdp_tx++; 741 rcu_read_unlock(); 742 goto xdp_xmit; 743 case XDP_REDIRECT: 744 get_page(virt_to_page(xdp.data)); 745 consume_skb(skb); 746 xdp.rxq->mem = rq->xdp_mem; 747 if (xdp_do_redirect(rq->dev, &xdp, xdp_prog)) { 748 stats->rx_drops++; 749 goto err_xdp; 750 } 751 stats->xdp_redirect++; 752 rcu_read_unlock(); 753 goto xdp_xmit; 754 default: 755 bpf_warn_invalid_xdp_action(act); 756 fallthrough; 757 case XDP_ABORTED: 758 trace_xdp_exception(rq->dev, xdp_prog, act); 759 fallthrough; 760 case XDP_DROP: 761 stats->xdp_drops++; 762 goto xdp_drop; 763 } 764 rcu_read_unlock(); 765 766 /* check if bpf_xdp_adjust_head was used */ 767 delta = orig_data - xdp.data; 768 off = mac_len + delta; 769 if (off > 0) 770 __skb_push(skb, off); 771 else if (off < 0) 772 __skb_pull(skb, -off); 773 skb->mac_header -= delta; 774 775 /* check if bpf_xdp_adjust_tail was used */ 776 off = xdp.data_end - orig_data_end; 777 if (off != 0) 778 __skb_put(skb, off); /* positive on grow, negative on shrink */ 779 skb->protocol = eth_type_trans(skb, rq->dev); 780 781 metalen = xdp.data - xdp.data_meta; 782 if (metalen) 783 skb_metadata_set(skb, metalen); 784out: 785 return skb; 786drop: 787 stats->rx_drops++; 788xdp_drop: 789 rcu_read_unlock(); 790 kfree_skb(skb); 791 return NULL; 792err_xdp: 793 rcu_read_unlock(); 794 page_frag_free(xdp.data); 795xdp_xmit: 796 return NULL; 797} 798 799static int veth_xdp_rcv(struct veth_rq *rq, int budget, 800 struct veth_xdp_tx_bq *bq, 801 struct veth_stats *stats) 802{ 803 int i, done = 0; 804 805 for (i = 0; i < budget; i++) { 806 void *ptr = __ptr_ring_consume(&rq->xdp_ring); 807 struct sk_buff *skb; 808 809 if (!ptr) 810 break; 811 812 if (veth_is_xdp_frame(ptr)) { 813 struct xdp_frame *frame = veth_ptr_to_xdp(ptr); 814 815 stats->xdp_bytes += frame->len; 816 skb = veth_xdp_rcv_one(rq, frame, bq, stats); 817 } else { 818 skb = ptr; 819 stats->xdp_bytes += skb->len; 820 skb = veth_xdp_rcv_skb(rq, skb, bq, stats); 821 } 822 823 if (skb) 824 napi_gro_receive(&rq->xdp_napi, skb); 825 826 done++; 827 } 828 829 u64_stats_update_begin(&rq->stats.syncp); 830 rq->stats.vs.xdp_redirect += stats->xdp_redirect; 831 rq->stats.vs.xdp_bytes += stats->xdp_bytes; 832 rq->stats.vs.xdp_drops += stats->xdp_drops; 833 rq->stats.vs.rx_drops += stats->rx_drops; 834 rq->stats.vs.xdp_packets += done; 835 u64_stats_update_end(&rq->stats.syncp); 836 837 return done; 838} 839 840static int veth_poll(struct napi_struct *napi, int budget) 841{ 842 struct veth_rq *rq = 843 container_of(napi, struct veth_rq, xdp_napi); 844 struct veth_stats stats = {}; 845 struct veth_xdp_tx_bq bq; 846 int done; 847 848 bq.count = 0; 849 850 xdp_set_return_frame_no_direct(); 851 done = veth_xdp_rcv(rq, budget, &bq, &stats); 852 853 if (stats.xdp_redirect > 0) 854 xdp_do_flush(); 855 856 if (done < budget && napi_complete_done(napi, done)) { 857 /* Write rx_notify_masked before reading ptr_ring */ 858 smp_store_mb(rq->rx_notify_masked, false); 859 if (unlikely(!__ptr_ring_empty(&rq->xdp_ring))) { 860 rq->rx_notify_masked = true; 861 napi_schedule(&rq->xdp_napi); 862 } 863 } 864 865 if (stats.xdp_tx > 0) 866 veth_xdp_flush(rq, &bq); 867 xdp_clear_return_frame_no_direct(); 868 869 return done; 870} 871 872static int veth_napi_add(struct net_device *dev) 873{ 874 struct veth_priv *priv = netdev_priv(dev); 875 int err, i; 876 877 for (i = 0; i < dev->real_num_rx_queues; i++) { 878 struct veth_rq *rq = &priv->rq[i]; 879 880 err = ptr_ring_init(&rq->xdp_ring, VETH_RING_SIZE, GFP_KERNEL); 881 if (err) 882 goto err_xdp_ring; 883 } 884 885 for (i = 0; i < dev->real_num_rx_queues; i++) { 886 struct veth_rq *rq = &priv->rq[i]; 887 888 netif_napi_add(dev, &rq->xdp_napi, veth_poll, NAPI_POLL_WEIGHT); 889 napi_enable(&rq->xdp_napi); 890 } 891 892 return 0; 893err_xdp_ring: 894 for (i--; i >= 0; i--) 895 ptr_ring_cleanup(&priv->rq[i].xdp_ring, veth_ptr_free); 896 897 return err; 898} 899 900static void veth_napi_del(struct net_device *dev) 901{ 902 struct veth_priv *priv = netdev_priv(dev); 903 int i; 904 905 for (i = 0; i < dev->real_num_rx_queues; i++) { 906 struct veth_rq *rq = &priv->rq[i]; 907 908 napi_disable(&rq->xdp_napi); 909 __netif_napi_del(&rq->xdp_napi); 910 } 911 synchronize_net(); 912 913 for (i = 0; i < dev->real_num_rx_queues; i++) { 914 struct veth_rq *rq = &priv->rq[i]; 915 916 rq->rx_notify_masked = false; 917 ptr_ring_cleanup(&rq->xdp_ring, veth_ptr_free); 918 } 919} 920 921static int veth_enable_xdp(struct net_device *dev) 922{ 923 struct veth_priv *priv = netdev_priv(dev); 924 int err, i; 925 926 if (!xdp_rxq_info_is_reg(&priv->rq[0].xdp_rxq)) { 927 for (i = 0; i < dev->real_num_rx_queues; i++) { 928 struct veth_rq *rq = &priv->rq[i]; 929 930 err = xdp_rxq_info_reg(&rq->xdp_rxq, dev, i); 931 if (err < 0) 932 goto err_rxq_reg; 933 934 err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq, 935 MEM_TYPE_PAGE_SHARED, 936 NULL); 937 if (err < 0) 938 goto err_reg_mem; 939 940 /* Save original mem info as it can be overwritten */ 941 rq->xdp_mem = rq->xdp_rxq.mem; 942 } 943 944 err = veth_napi_add(dev); 945 if (err) 946 goto err_rxq_reg; 947 } 948 949 for (i = 0; i < dev->real_num_rx_queues; i++) 950 rcu_assign_pointer(priv->rq[i].xdp_prog, priv->_xdp_prog); 951 952 return 0; 953err_reg_mem: 954 xdp_rxq_info_unreg(&priv->rq[i].xdp_rxq); 955err_rxq_reg: 956 for (i--; i >= 0; i--) 957 xdp_rxq_info_unreg(&priv->rq[i].xdp_rxq); 958 959 return err; 960} 961 962static void veth_disable_xdp(struct net_device *dev) 963{ 964 struct veth_priv *priv = netdev_priv(dev); 965 int i; 966 967 for (i = 0; i < dev->real_num_rx_queues; i++) 968 rcu_assign_pointer(priv->rq[i].xdp_prog, NULL); 969 veth_napi_del(dev); 970 for (i = 0; i < dev->real_num_rx_queues; i++) { 971 struct veth_rq *rq = &priv->rq[i]; 972 973 rq->xdp_rxq.mem = rq->xdp_mem; 974 xdp_rxq_info_unreg(&rq->xdp_rxq); 975 } 976} 977 978static int veth_open(struct net_device *dev) 979{ 980 struct veth_priv *priv = netdev_priv(dev); 981 struct net_device *peer = rtnl_dereference(priv->peer); 982 int err; 983 984 if (!peer) 985 return -ENOTCONN; 986 987 if (priv->_xdp_prog) { 988 err = veth_enable_xdp(dev); 989 if (err) 990 return err; 991 } 992 993 if (peer->flags & IFF_UP) { 994 netif_carrier_on(dev); 995 netif_carrier_on(peer); 996 } 997 998 return 0; 999} 1000 1001static int veth_close(struct net_device *dev) 1002{ 1003 struct veth_priv *priv = netdev_priv(dev); 1004 struct net_device *peer = rtnl_dereference(priv->peer); 1005 1006 netif_carrier_off(dev); 1007 if (peer) 1008 netif_carrier_off(peer); 1009 1010 if (priv->_xdp_prog) 1011 veth_disable_xdp(dev); 1012 1013 return 0; 1014} 1015 1016static int is_valid_veth_mtu(int mtu) 1017{ 1018 return mtu >= ETH_MIN_MTU && mtu <= ETH_MAX_MTU; 1019} 1020 1021static int veth_alloc_queues(struct net_device *dev) 1022{ 1023 struct veth_priv *priv = netdev_priv(dev); 1024 int i; 1025 1026 priv->rq = kcalloc(dev->num_rx_queues, sizeof(*priv->rq), GFP_KERNEL); 1027 if (!priv->rq) 1028 return -ENOMEM; 1029 1030 for (i = 0; i < dev->num_rx_queues; i++) { 1031 priv->rq[i].dev = dev; 1032 u64_stats_init(&priv->rq[i].stats.syncp); 1033 } 1034 1035 return 0; 1036} 1037 1038static void veth_free_queues(struct net_device *dev) 1039{ 1040 struct veth_priv *priv = netdev_priv(dev); 1041 1042 kfree(priv->rq); 1043} 1044 1045static int veth_dev_init(struct net_device *dev) 1046{ 1047 int err; 1048 1049 dev->lstats = netdev_alloc_pcpu_stats(struct pcpu_lstats); 1050 if (!dev->lstats) 1051 return -ENOMEM; 1052 1053 err = veth_alloc_queues(dev); 1054 if (err) { 1055 free_percpu(dev->lstats); 1056 return err; 1057 } 1058 1059 return 0; 1060} 1061 1062static void veth_dev_free(struct net_device *dev) 1063{ 1064 veth_free_queues(dev); 1065 free_percpu(dev->lstats); 1066} 1067 1068#ifdef CONFIG_NET_POLL_CONTROLLER 1069static void veth_poll_controller(struct net_device *dev) 1070{ 1071 /* veth only receives frames when its peer sends one 1072 * Since it has nothing to do with disabling irqs, we are guaranteed 1073 * never to have pending data when we poll for it so 1074 * there is nothing to do here. 1075 * 1076 * We need this though so netpoll recognizes us as an interface that 1077 * supports polling, which enables bridge devices in virt setups to 1078 * still use netconsole 1079 */ 1080} 1081#endif /* CONFIG_NET_POLL_CONTROLLER */ 1082 1083static int veth_get_iflink(const struct net_device *dev) 1084{ 1085 struct veth_priv *priv = netdev_priv(dev); 1086 struct net_device *peer; 1087 int iflink; 1088 1089 rcu_read_lock(); 1090 peer = rcu_dereference(priv->peer); 1091 iflink = peer ? peer->ifindex : 0; 1092 rcu_read_unlock(); 1093 1094 return iflink; 1095} 1096 1097static netdev_features_t veth_fix_features(struct net_device *dev, 1098 netdev_features_t features) 1099{ 1100 struct veth_priv *priv = netdev_priv(dev); 1101 struct net_device *peer; 1102 1103 peer = rtnl_dereference(priv->peer); 1104 if (peer) { 1105 struct veth_priv *peer_priv = netdev_priv(peer); 1106 1107 if (peer_priv->_xdp_prog) 1108 features &= ~NETIF_F_GSO_SOFTWARE; 1109 } 1110 1111 return features; 1112} 1113 1114static void veth_set_rx_headroom(struct net_device *dev, int new_hr) 1115{ 1116 struct veth_priv *peer_priv, *priv = netdev_priv(dev); 1117 struct net_device *peer; 1118 1119 if (new_hr < 0) 1120 new_hr = 0; 1121 1122 rcu_read_lock(); 1123 peer = rcu_dereference(priv->peer); 1124 if (unlikely(!peer)) 1125 goto out; 1126 1127 peer_priv = netdev_priv(peer); 1128 priv->requested_headroom = new_hr; 1129 new_hr = max(priv->requested_headroom, peer_priv->requested_headroom); 1130 dev->needed_headroom = new_hr; 1131 peer->needed_headroom = new_hr; 1132 1133out: 1134 rcu_read_unlock(); 1135} 1136 1137static int veth_xdp_set(struct net_device *dev, struct bpf_prog *prog, 1138 struct netlink_ext_ack *extack) 1139{ 1140 struct veth_priv *priv = netdev_priv(dev); 1141 struct bpf_prog *old_prog; 1142 struct net_device *peer; 1143 unsigned int max_mtu; 1144 int err; 1145 1146 old_prog = priv->_xdp_prog; 1147 priv->_xdp_prog = prog; 1148 peer = rtnl_dereference(priv->peer); 1149 1150 if (prog) { 1151 if (!peer) { 1152 NL_SET_ERR_MSG_MOD(extack, "Cannot set XDP when peer is detached"); 1153 err = -ENOTCONN; 1154 goto err; 1155 } 1156 1157 max_mtu = PAGE_SIZE - VETH_XDP_HEADROOM - 1158 peer->hard_header_len - 1159 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 1160 if (peer->mtu > max_mtu) { 1161 NL_SET_ERR_MSG_MOD(extack, "Peer MTU is too large to set XDP"); 1162 err = -ERANGE; 1163 goto err; 1164 } 1165 1166 if (dev->real_num_rx_queues < peer->real_num_tx_queues) { 1167 NL_SET_ERR_MSG_MOD(extack, "XDP expects number of rx queues not less than peer tx queues"); 1168 err = -ENOSPC; 1169 goto err; 1170 } 1171 1172 if (dev->flags & IFF_UP) { 1173 err = veth_enable_xdp(dev); 1174 if (err) { 1175 NL_SET_ERR_MSG_MOD(extack, "Setup for XDP failed"); 1176 goto err; 1177 } 1178 } 1179 1180 if (!old_prog) { 1181 peer->hw_features &= ~NETIF_F_GSO_SOFTWARE; 1182 peer->max_mtu = max_mtu; 1183 } 1184 } 1185 1186 if (old_prog) { 1187 if (!prog) { 1188 if (dev->flags & IFF_UP) 1189 veth_disable_xdp(dev); 1190 1191 if (peer) { 1192 peer->hw_features |= NETIF_F_GSO_SOFTWARE; 1193 peer->max_mtu = ETH_MAX_MTU; 1194 } 1195 } 1196 bpf_prog_put(old_prog); 1197 } 1198 1199 if ((!!old_prog ^ !!prog) && peer) 1200 netdev_update_features(peer); 1201 1202 return 0; 1203err: 1204 priv->_xdp_prog = old_prog; 1205 1206 return err; 1207} 1208 1209static int veth_xdp(struct net_device *dev, struct netdev_bpf *xdp) 1210{ 1211 switch (xdp->command) { 1212 case XDP_SETUP_PROG: 1213 return veth_xdp_set(dev, xdp->prog, xdp->extack); 1214 default: 1215 return -EINVAL; 1216 } 1217} 1218 1219static const struct net_device_ops veth_netdev_ops = { 1220 .ndo_init = veth_dev_init, 1221 .ndo_open = veth_open, 1222 .ndo_stop = veth_close, 1223 .ndo_start_xmit = veth_xmit, 1224 .ndo_get_stats64 = veth_get_stats64, 1225 .ndo_set_rx_mode = veth_set_multicast_list, 1226 .ndo_set_mac_address = eth_mac_addr, 1227#ifdef CONFIG_NET_POLL_CONTROLLER 1228 .ndo_poll_controller = veth_poll_controller, 1229#endif 1230 .ndo_get_iflink = veth_get_iflink, 1231 .ndo_fix_features = veth_fix_features, 1232 .ndo_features_check = passthru_features_check, 1233 .ndo_set_rx_headroom = veth_set_rx_headroom, 1234 .ndo_bpf = veth_xdp, 1235 .ndo_xdp_xmit = veth_ndo_xdp_xmit, 1236 .ndo_get_peer_dev = veth_peer_dev, 1237}; 1238 1239#define VETH_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HW_CSUM | \ 1240 NETIF_F_RXCSUM | NETIF_F_SCTP_CRC | NETIF_F_HIGHDMA | \ 1241 NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ENCAP_ALL | \ 1242 NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | \ 1243 NETIF_F_HW_VLAN_STAG_TX | NETIF_F_HW_VLAN_STAG_RX ) 1244 1245static void veth_setup(struct net_device *dev) 1246{ 1247 ether_setup(dev); 1248 1249 dev->priv_flags &= ~IFF_TX_SKB_SHARING; 1250 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; 1251 dev->priv_flags |= IFF_NO_QUEUE; 1252 dev->priv_flags |= IFF_PHONY_HEADROOM; 1253 1254 dev->netdev_ops = &veth_netdev_ops; 1255 dev->ethtool_ops = &veth_ethtool_ops; 1256 dev->features |= NETIF_F_LLTX; 1257 dev->features |= VETH_FEATURES; 1258 dev->vlan_features = dev->features & 1259 ~(NETIF_F_HW_VLAN_CTAG_TX | 1260 NETIF_F_HW_VLAN_STAG_TX | 1261 NETIF_F_HW_VLAN_CTAG_RX | 1262 NETIF_F_HW_VLAN_STAG_RX); 1263 dev->needs_free_netdev = true; 1264 dev->priv_destructor = veth_dev_free; 1265 dev->max_mtu = ETH_MAX_MTU; 1266 1267 dev->hw_features = VETH_FEATURES; 1268 dev->hw_enc_features = VETH_FEATURES; 1269 dev->mpls_features = NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE; 1270} 1271 1272/* 1273 * netlink interface 1274 */ 1275 1276static int veth_validate(struct nlattr *tb[], struct nlattr *data[], 1277 struct netlink_ext_ack *extack) 1278{ 1279 if (tb[IFLA_ADDRESS]) { 1280 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) 1281 return -EINVAL; 1282 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) 1283 return -EADDRNOTAVAIL; 1284 } 1285 if (tb[IFLA_MTU]) { 1286 if (!is_valid_veth_mtu(nla_get_u32(tb[IFLA_MTU]))) 1287 return -EINVAL; 1288 } 1289 return 0; 1290} 1291 1292static struct rtnl_link_ops veth_link_ops; 1293 1294static int veth_newlink(struct net *src_net, struct net_device *dev, 1295 struct nlattr *tb[], struct nlattr *data[], 1296 struct netlink_ext_ack *extack) 1297{ 1298 int err; 1299 struct net_device *peer; 1300 struct veth_priv *priv; 1301 char ifname[IFNAMSIZ]; 1302 struct nlattr *peer_tb[IFLA_MAX + 1], **tbp; 1303 unsigned char name_assign_type; 1304 struct ifinfomsg *ifmp; 1305 struct net *net; 1306 1307 /* 1308 * create and register peer first 1309 */ 1310 if (data != NULL && data[VETH_INFO_PEER] != NULL) { 1311 struct nlattr *nla_peer; 1312 1313 nla_peer = data[VETH_INFO_PEER]; 1314 ifmp = nla_data(nla_peer); 1315 err = rtnl_nla_parse_ifinfomsg(peer_tb, nla_peer, extack); 1316 if (err < 0) 1317 return err; 1318 1319 err = veth_validate(peer_tb, NULL, extack); 1320 if (err < 0) 1321 return err; 1322 1323 tbp = peer_tb; 1324 } else { 1325 ifmp = NULL; 1326 tbp = tb; 1327 } 1328 1329 if (ifmp && tbp[IFLA_IFNAME]) { 1330 nla_strlcpy(ifname, tbp[IFLA_IFNAME], IFNAMSIZ); 1331 name_assign_type = NET_NAME_USER; 1332 } else { 1333 snprintf(ifname, IFNAMSIZ, DRV_NAME "%%d"); 1334 name_assign_type = NET_NAME_ENUM; 1335 } 1336 1337 net = rtnl_link_get_net(src_net, tbp); 1338 if (IS_ERR(net)) 1339 return PTR_ERR(net); 1340 1341 peer = rtnl_create_link(net, ifname, name_assign_type, 1342 &veth_link_ops, tbp, extack); 1343 if (IS_ERR(peer)) { 1344 put_net(net); 1345 return PTR_ERR(peer); 1346 } 1347 1348 if (!ifmp || !tbp[IFLA_ADDRESS]) 1349 eth_hw_addr_random(peer); 1350 1351 if (ifmp && (dev->ifindex != 0)) 1352 peer->ifindex = ifmp->ifi_index; 1353 1354 peer->gso_max_size = dev->gso_max_size; 1355 peer->gso_max_segs = dev->gso_max_segs; 1356 1357 err = register_netdevice(peer); 1358 put_net(net); 1359 net = NULL; 1360 if (err < 0) 1361 goto err_register_peer; 1362 1363 netif_carrier_off(peer); 1364 1365 err = rtnl_configure_link(peer, ifmp); 1366 if (err < 0) 1367 goto err_configure_peer; 1368 1369 /* 1370 * register dev last 1371 * 1372 * note, that since we've registered new device the dev's name 1373 * should be re-allocated 1374 */ 1375 1376 if (tb[IFLA_ADDRESS] == NULL) 1377 eth_hw_addr_random(dev); 1378 1379 if (tb[IFLA_IFNAME]) 1380 nla_strlcpy(dev->name, tb[IFLA_IFNAME], IFNAMSIZ); 1381 else 1382 snprintf(dev->name, IFNAMSIZ, DRV_NAME "%%d"); 1383 1384 err = register_netdevice(dev); 1385 if (err < 0) 1386 goto err_register_dev; 1387 1388 netif_carrier_off(dev); 1389 1390 /* 1391 * tie the deviced together 1392 */ 1393 1394 priv = netdev_priv(dev); 1395 rcu_assign_pointer(priv->peer, peer); 1396 1397 priv = netdev_priv(peer); 1398 rcu_assign_pointer(priv->peer, dev); 1399 1400 return 0; 1401 1402err_register_dev: 1403 /* nothing to do */ 1404err_configure_peer: 1405 unregister_netdevice(peer); 1406 return err; 1407 1408err_register_peer: 1409 free_netdev(peer); 1410 return err; 1411} 1412 1413static void veth_dellink(struct net_device *dev, struct list_head *head) 1414{ 1415 struct veth_priv *priv; 1416 struct net_device *peer; 1417 1418 priv = netdev_priv(dev); 1419 peer = rtnl_dereference(priv->peer); 1420 1421 /* Note : dellink() is called from default_device_exit_batch(), 1422 * before a rcu_synchronize() point. The devices are guaranteed 1423 * not being freed before one RCU grace period. 1424 */ 1425 RCU_INIT_POINTER(priv->peer, NULL); 1426 unregister_netdevice_queue(dev, head); 1427 1428 if (peer) { 1429 priv = netdev_priv(peer); 1430 RCU_INIT_POINTER(priv->peer, NULL); 1431 unregister_netdevice_queue(peer, head); 1432 } 1433} 1434 1435static const struct nla_policy veth_policy[VETH_INFO_MAX + 1] = { 1436 [VETH_INFO_PEER] = { .len = sizeof(struct ifinfomsg) }, 1437}; 1438 1439static struct net *veth_get_link_net(const struct net_device *dev) 1440{ 1441 struct veth_priv *priv = netdev_priv(dev); 1442 struct net_device *peer = rtnl_dereference(priv->peer); 1443 1444 return peer ? dev_net(peer) : dev_net(dev); 1445} 1446 1447static struct rtnl_link_ops veth_link_ops = { 1448 .kind = DRV_NAME, 1449 .priv_size = sizeof(struct veth_priv), 1450 .setup = veth_setup, 1451 .validate = veth_validate, 1452 .newlink = veth_newlink, 1453 .dellink = veth_dellink, 1454 .policy = veth_policy, 1455 .maxtype = VETH_INFO_MAX, 1456 .get_link_net = veth_get_link_net, 1457}; 1458 1459/* 1460 * init/fini 1461 */ 1462 1463static __init int veth_init(void) 1464{ 1465 return rtnl_link_register(&veth_link_ops); 1466} 1467 1468static __exit void veth_exit(void) 1469{ 1470 rtnl_link_unregister(&veth_link_ops); 1471} 1472 1473module_init(veth_init); 1474module_exit(veth_exit); 1475 1476MODULE_DESCRIPTION("Virtual Ethernet Tunnel"); 1477MODULE_LICENSE("GPL v2"); 1478MODULE_ALIAS_RTNL_LINK(DRV_NAME); 1479