162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * drivers/net/veth.c 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Copyright (C) 2007 OpenVZ http://openvz.org, SWsoft Inc 662306a36Sopenharmony_ci * 762306a36Sopenharmony_ci * Author: Pavel Emelianov <xemul@openvz.org> 862306a36Sopenharmony_ci * Ethtool interface from: Eric W. Biederman <ebiederm@xmission.com> 962306a36Sopenharmony_ci * 1062306a36Sopenharmony_ci */ 1162306a36Sopenharmony_ci 1262306a36Sopenharmony_ci#include <linux/netdevice.h> 1362306a36Sopenharmony_ci#include <linux/slab.h> 1462306a36Sopenharmony_ci#include <linux/ethtool.h> 1562306a36Sopenharmony_ci#include <linux/etherdevice.h> 1662306a36Sopenharmony_ci#include <linux/u64_stats_sync.h> 1762306a36Sopenharmony_ci 1862306a36Sopenharmony_ci#include <net/rtnetlink.h> 1962306a36Sopenharmony_ci#include <net/dst.h> 2062306a36Sopenharmony_ci#include <net/xfrm.h> 2162306a36Sopenharmony_ci#include <net/xdp.h> 2262306a36Sopenharmony_ci#include <linux/veth.h> 2362306a36Sopenharmony_ci#include <linux/module.h> 2462306a36Sopenharmony_ci#include <linux/bpf.h> 2562306a36Sopenharmony_ci#include <linux/filter.h> 2662306a36Sopenharmony_ci#include <linux/ptr_ring.h> 2762306a36Sopenharmony_ci#include <linux/bpf_trace.h> 2862306a36Sopenharmony_ci#include <linux/net_tstamp.h> 2962306a36Sopenharmony_ci#include <net/page_pool/helpers.h> 3062306a36Sopenharmony_ci 3162306a36Sopenharmony_ci#define DRV_NAME "veth" 3262306a36Sopenharmony_ci#define DRV_VERSION "1.0" 3362306a36Sopenharmony_ci 3462306a36Sopenharmony_ci#define VETH_XDP_FLAG BIT(0) 3562306a36Sopenharmony_ci#define VETH_RING_SIZE 256 3662306a36Sopenharmony_ci#define VETH_XDP_HEADROOM (XDP_PACKET_HEADROOM + NET_IP_ALIGN) 3762306a36Sopenharmony_ci 3862306a36Sopenharmony_ci#define VETH_XDP_TX_BULK_SIZE 16 3962306a36Sopenharmony_ci#define VETH_XDP_BATCH 16 4062306a36Sopenharmony_ci 4162306a36Sopenharmony_cistruct veth_stats { 4262306a36Sopenharmony_ci u64 rx_drops; 4362306a36Sopenharmony_ci /* xdp */ 4462306a36Sopenharmony_ci u64 xdp_packets; 4562306a36Sopenharmony_ci u64 xdp_bytes; 4662306a36Sopenharmony_ci u64 xdp_redirect; 4762306a36Sopenharmony_ci u64 xdp_drops; 4862306a36Sopenharmony_ci u64 xdp_tx; 4962306a36Sopenharmony_ci u64 xdp_tx_err; 5062306a36Sopenharmony_ci u64 peer_tq_xdp_xmit; 5162306a36Sopenharmony_ci u64 peer_tq_xdp_xmit_err; 5262306a36Sopenharmony_ci}; 5362306a36Sopenharmony_ci 5462306a36Sopenharmony_cistruct veth_rq_stats { 5562306a36Sopenharmony_ci struct veth_stats vs; 5662306a36Sopenharmony_ci struct u64_stats_sync syncp; 5762306a36Sopenharmony_ci}; 5862306a36Sopenharmony_ci 5962306a36Sopenharmony_cistruct veth_rq { 6062306a36Sopenharmony_ci struct napi_struct xdp_napi; 6162306a36Sopenharmony_ci struct napi_struct __rcu *napi; /* points to xdp_napi when the latter is initialized */ 6262306a36Sopenharmony_ci struct net_device *dev; 6362306a36Sopenharmony_ci struct bpf_prog __rcu *xdp_prog; 6462306a36Sopenharmony_ci struct xdp_mem_info xdp_mem; 6562306a36Sopenharmony_ci struct veth_rq_stats stats; 6662306a36Sopenharmony_ci bool rx_notify_masked; 6762306a36Sopenharmony_ci struct ptr_ring xdp_ring; 6862306a36Sopenharmony_ci struct xdp_rxq_info xdp_rxq; 6962306a36Sopenharmony_ci struct page_pool *page_pool; 7062306a36Sopenharmony_ci}; 7162306a36Sopenharmony_ci 7262306a36Sopenharmony_cistruct veth_priv { 7362306a36Sopenharmony_ci struct net_device __rcu *peer; 7462306a36Sopenharmony_ci atomic64_t dropped; 7562306a36Sopenharmony_ci struct bpf_prog *_xdp_prog; 7662306a36Sopenharmony_ci struct veth_rq *rq; 7762306a36Sopenharmony_ci unsigned int requested_headroom; 7862306a36Sopenharmony_ci}; 7962306a36Sopenharmony_ci 8062306a36Sopenharmony_cistruct veth_xdp_tx_bq { 8162306a36Sopenharmony_ci struct xdp_frame *q[VETH_XDP_TX_BULK_SIZE]; 8262306a36Sopenharmony_ci unsigned int count; 8362306a36Sopenharmony_ci}; 8462306a36Sopenharmony_ci 8562306a36Sopenharmony_ci/* 8662306a36Sopenharmony_ci * ethtool interface 8762306a36Sopenharmony_ci */ 8862306a36Sopenharmony_ci 8962306a36Sopenharmony_cistruct veth_q_stat_desc { 9062306a36Sopenharmony_ci char desc[ETH_GSTRING_LEN]; 9162306a36Sopenharmony_ci size_t offset; 9262306a36Sopenharmony_ci}; 9362306a36Sopenharmony_ci 9462306a36Sopenharmony_ci#define VETH_RQ_STAT(m) offsetof(struct veth_stats, m) 9562306a36Sopenharmony_ci 9662306a36Sopenharmony_cistatic const struct veth_q_stat_desc veth_rq_stats_desc[] = { 9762306a36Sopenharmony_ci { "xdp_packets", VETH_RQ_STAT(xdp_packets) }, 9862306a36Sopenharmony_ci { "xdp_bytes", VETH_RQ_STAT(xdp_bytes) }, 9962306a36Sopenharmony_ci { "drops", VETH_RQ_STAT(rx_drops) }, 10062306a36Sopenharmony_ci { "xdp_redirect", VETH_RQ_STAT(xdp_redirect) }, 10162306a36Sopenharmony_ci { "xdp_drops", VETH_RQ_STAT(xdp_drops) }, 10262306a36Sopenharmony_ci { "xdp_tx", VETH_RQ_STAT(xdp_tx) }, 10362306a36Sopenharmony_ci { "xdp_tx_errors", VETH_RQ_STAT(xdp_tx_err) }, 10462306a36Sopenharmony_ci}; 10562306a36Sopenharmony_ci 10662306a36Sopenharmony_ci#define VETH_RQ_STATS_LEN ARRAY_SIZE(veth_rq_stats_desc) 10762306a36Sopenharmony_ci 10862306a36Sopenharmony_cistatic const struct veth_q_stat_desc veth_tq_stats_desc[] = { 10962306a36Sopenharmony_ci { "xdp_xmit", VETH_RQ_STAT(peer_tq_xdp_xmit) }, 11062306a36Sopenharmony_ci { "xdp_xmit_errors", VETH_RQ_STAT(peer_tq_xdp_xmit_err) }, 11162306a36Sopenharmony_ci}; 11262306a36Sopenharmony_ci 11362306a36Sopenharmony_ci#define VETH_TQ_STATS_LEN ARRAY_SIZE(veth_tq_stats_desc) 11462306a36Sopenharmony_ci 11562306a36Sopenharmony_cistatic struct { 11662306a36Sopenharmony_ci const char string[ETH_GSTRING_LEN]; 11762306a36Sopenharmony_ci} ethtool_stats_keys[] = { 11862306a36Sopenharmony_ci { "peer_ifindex" }, 11962306a36Sopenharmony_ci}; 12062306a36Sopenharmony_ci 12162306a36Sopenharmony_cistruct veth_xdp_buff { 12262306a36Sopenharmony_ci struct xdp_buff xdp; 12362306a36Sopenharmony_ci struct sk_buff *skb; 12462306a36Sopenharmony_ci}; 12562306a36Sopenharmony_ci 12662306a36Sopenharmony_cistatic int veth_get_link_ksettings(struct net_device *dev, 12762306a36Sopenharmony_ci struct ethtool_link_ksettings *cmd) 12862306a36Sopenharmony_ci{ 12962306a36Sopenharmony_ci cmd->base.speed = SPEED_10000; 13062306a36Sopenharmony_ci cmd->base.duplex = DUPLEX_FULL; 13162306a36Sopenharmony_ci cmd->base.port = PORT_TP; 13262306a36Sopenharmony_ci cmd->base.autoneg = AUTONEG_DISABLE; 13362306a36Sopenharmony_ci return 0; 13462306a36Sopenharmony_ci} 13562306a36Sopenharmony_ci 13662306a36Sopenharmony_cistatic void veth_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) 13762306a36Sopenharmony_ci{ 13862306a36Sopenharmony_ci strscpy(info->driver, DRV_NAME, sizeof(info->driver)); 13962306a36Sopenharmony_ci strscpy(info->version, DRV_VERSION, sizeof(info->version)); 14062306a36Sopenharmony_ci} 14162306a36Sopenharmony_ci 14262306a36Sopenharmony_cistatic void veth_get_strings(struct net_device *dev, u32 stringset, u8 *buf) 14362306a36Sopenharmony_ci{ 14462306a36Sopenharmony_ci u8 *p = buf; 14562306a36Sopenharmony_ci int i, j; 14662306a36Sopenharmony_ci 14762306a36Sopenharmony_ci switch(stringset) { 14862306a36Sopenharmony_ci case ETH_SS_STATS: 14962306a36Sopenharmony_ci memcpy(p, ðtool_stats_keys, sizeof(ethtool_stats_keys)); 15062306a36Sopenharmony_ci p += sizeof(ethtool_stats_keys); 15162306a36Sopenharmony_ci for (i = 0; i < dev->real_num_rx_queues; i++) 15262306a36Sopenharmony_ci for (j = 0; j < VETH_RQ_STATS_LEN; j++) 15362306a36Sopenharmony_ci ethtool_sprintf(&p, "rx_queue_%u_%.18s", 15462306a36Sopenharmony_ci i, veth_rq_stats_desc[j].desc); 15562306a36Sopenharmony_ci 15662306a36Sopenharmony_ci for (i = 0; i < dev->real_num_tx_queues; i++) 15762306a36Sopenharmony_ci for (j = 0; j < VETH_TQ_STATS_LEN; j++) 15862306a36Sopenharmony_ci ethtool_sprintf(&p, "tx_queue_%u_%.18s", 15962306a36Sopenharmony_ci i, veth_tq_stats_desc[j].desc); 16062306a36Sopenharmony_ci 16162306a36Sopenharmony_ci page_pool_ethtool_stats_get_strings(p); 16262306a36Sopenharmony_ci break; 16362306a36Sopenharmony_ci } 16462306a36Sopenharmony_ci} 16562306a36Sopenharmony_ci 16662306a36Sopenharmony_cistatic int veth_get_sset_count(struct net_device *dev, int sset) 16762306a36Sopenharmony_ci{ 16862306a36Sopenharmony_ci switch (sset) { 16962306a36Sopenharmony_ci case ETH_SS_STATS: 17062306a36Sopenharmony_ci return ARRAY_SIZE(ethtool_stats_keys) + 17162306a36Sopenharmony_ci VETH_RQ_STATS_LEN * dev->real_num_rx_queues + 17262306a36Sopenharmony_ci VETH_TQ_STATS_LEN * dev->real_num_tx_queues + 17362306a36Sopenharmony_ci page_pool_ethtool_stats_get_count(); 17462306a36Sopenharmony_ci default: 17562306a36Sopenharmony_ci return -EOPNOTSUPP; 17662306a36Sopenharmony_ci } 17762306a36Sopenharmony_ci} 17862306a36Sopenharmony_ci 17962306a36Sopenharmony_cistatic void veth_get_page_pool_stats(struct net_device *dev, u64 *data) 18062306a36Sopenharmony_ci{ 18162306a36Sopenharmony_ci#ifdef CONFIG_PAGE_POOL_STATS 18262306a36Sopenharmony_ci struct veth_priv *priv = netdev_priv(dev); 18362306a36Sopenharmony_ci struct page_pool_stats pp_stats = {}; 18462306a36Sopenharmony_ci int i; 18562306a36Sopenharmony_ci 18662306a36Sopenharmony_ci for (i = 0; i < dev->real_num_rx_queues; i++) { 18762306a36Sopenharmony_ci if (!priv->rq[i].page_pool) 18862306a36Sopenharmony_ci continue; 18962306a36Sopenharmony_ci page_pool_get_stats(priv->rq[i].page_pool, &pp_stats); 19062306a36Sopenharmony_ci } 19162306a36Sopenharmony_ci page_pool_ethtool_stats_get(data, &pp_stats); 19262306a36Sopenharmony_ci#endif /* CONFIG_PAGE_POOL_STATS */ 19362306a36Sopenharmony_ci} 19462306a36Sopenharmony_ci 19562306a36Sopenharmony_cistatic void veth_get_ethtool_stats(struct net_device *dev, 19662306a36Sopenharmony_ci struct ethtool_stats *stats, u64 *data) 19762306a36Sopenharmony_ci{ 19862306a36Sopenharmony_ci struct veth_priv *rcv_priv, *priv = netdev_priv(dev); 19962306a36Sopenharmony_ci struct net_device *peer = rtnl_dereference(priv->peer); 20062306a36Sopenharmony_ci int i, j, idx, pp_idx; 20162306a36Sopenharmony_ci 20262306a36Sopenharmony_ci data[0] = peer ? peer->ifindex : 0; 20362306a36Sopenharmony_ci idx = 1; 20462306a36Sopenharmony_ci for (i = 0; i < dev->real_num_rx_queues; i++) { 20562306a36Sopenharmony_ci const struct veth_rq_stats *rq_stats = &priv->rq[i].stats; 20662306a36Sopenharmony_ci const void *stats_base = (void *)&rq_stats->vs; 20762306a36Sopenharmony_ci unsigned int start; 20862306a36Sopenharmony_ci size_t offset; 20962306a36Sopenharmony_ci 21062306a36Sopenharmony_ci do { 21162306a36Sopenharmony_ci start = u64_stats_fetch_begin(&rq_stats->syncp); 21262306a36Sopenharmony_ci for (j = 0; j < VETH_RQ_STATS_LEN; j++) { 21362306a36Sopenharmony_ci offset = veth_rq_stats_desc[j].offset; 21462306a36Sopenharmony_ci data[idx + j] = *(u64 *)(stats_base + offset); 21562306a36Sopenharmony_ci } 21662306a36Sopenharmony_ci } while (u64_stats_fetch_retry(&rq_stats->syncp, start)); 21762306a36Sopenharmony_ci idx += VETH_RQ_STATS_LEN; 21862306a36Sopenharmony_ci } 21962306a36Sopenharmony_ci pp_idx = idx; 22062306a36Sopenharmony_ci 22162306a36Sopenharmony_ci if (!peer) 22262306a36Sopenharmony_ci goto page_pool_stats; 22362306a36Sopenharmony_ci 22462306a36Sopenharmony_ci rcv_priv = netdev_priv(peer); 22562306a36Sopenharmony_ci for (i = 0; i < peer->real_num_rx_queues; i++) { 22662306a36Sopenharmony_ci const struct veth_rq_stats *rq_stats = &rcv_priv->rq[i].stats; 22762306a36Sopenharmony_ci const void *base = (void *)&rq_stats->vs; 22862306a36Sopenharmony_ci unsigned int start, tx_idx = idx; 22962306a36Sopenharmony_ci size_t offset; 23062306a36Sopenharmony_ci 23162306a36Sopenharmony_ci tx_idx += (i % dev->real_num_tx_queues) * VETH_TQ_STATS_LEN; 23262306a36Sopenharmony_ci do { 23362306a36Sopenharmony_ci start = u64_stats_fetch_begin(&rq_stats->syncp); 23462306a36Sopenharmony_ci for (j = 0; j < VETH_TQ_STATS_LEN; j++) { 23562306a36Sopenharmony_ci offset = veth_tq_stats_desc[j].offset; 23662306a36Sopenharmony_ci data[tx_idx + j] += *(u64 *)(base + offset); 23762306a36Sopenharmony_ci } 23862306a36Sopenharmony_ci } while (u64_stats_fetch_retry(&rq_stats->syncp, start)); 23962306a36Sopenharmony_ci } 24062306a36Sopenharmony_ci pp_idx = idx + dev->real_num_tx_queues * VETH_TQ_STATS_LEN; 24162306a36Sopenharmony_ci 24262306a36Sopenharmony_cipage_pool_stats: 24362306a36Sopenharmony_ci veth_get_page_pool_stats(dev, &data[pp_idx]); 24462306a36Sopenharmony_ci} 24562306a36Sopenharmony_ci 24662306a36Sopenharmony_cistatic void veth_get_channels(struct net_device *dev, 24762306a36Sopenharmony_ci struct ethtool_channels *channels) 24862306a36Sopenharmony_ci{ 24962306a36Sopenharmony_ci channels->tx_count = dev->real_num_tx_queues; 25062306a36Sopenharmony_ci channels->rx_count = dev->real_num_rx_queues; 25162306a36Sopenharmony_ci channels->max_tx = dev->num_tx_queues; 25262306a36Sopenharmony_ci channels->max_rx = dev->num_rx_queues; 25362306a36Sopenharmony_ci} 25462306a36Sopenharmony_ci 25562306a36Sopenharmony_cistatic int veth_set_channels(struct net_device *dev, 25662306a36Sopenharmony_ci struct ethtool_channels *ch); 25762306a36Sopenharmony_ci 25862306a36Sopenharmony_cistatic const struct ethtool_ops veth_ethtool_ops = { 25962306a36Sopenharmony_ci .get_drvinfo = veth_get_drvinfo, 26062306a36Sopenharmony_ci .get_link = ethtool_op_get_link, 26162306a36Sopenharmony_ci .get_strings = veth_get_strings, 26262306a36Sopenharmony_ci .get_sset_count = veth_get_sset_count, 26362306a36Sopenharmony_ci .get_ethtool_stats = veth_get_ethtool_stats, 26462306a36Sopenharmony_ci .get_link_ksettings = veth_get_link_ksettings, 26562306a36Sopenharmony_ci .get_ts_info = ethtool_op_get_ts_info, 26662306a36Sopenharmony_ci .get_channels = veth_get_channels, 26762306a36Sopenharmony_ci .set_channels = veth_set_channels, 26862306a36Sopenharmony_ci}; 26962306a36Sopenharmony_ci 27062306a36Sopenharmony_ci/* general routines */ 27162306a36Sopenharmony_ci 27262306a36Sopenharmony_cistatic bool veth_is_xdp_frame(void *ptr) 27362306a36Sopenharmony_ci{ 27462306a36Sopenharmony_ci return (unsigned long)ptr & VETH_XDP_FLAG; 27562306a36Sopenharmony_ci} 27662306a36Sopenharmony_ci 27762306a36Sopenharmony_cistatic struct xdp_frame *veth_ptr_to_xdp(void *ptr) 27862306a36Sopenharmony_ci{ 27962306a36Sopenharmony_ci return (void *)((unsigned long)ptr & ~VETH_XDP_FLAG); 28062306a36Sopenharmony_ci} 28162306a36Sopenharmony_ci 28262306a36Sopenharmony_cistatic void *veth_xdp_to_ptr(struct xdp_frame *xdp) 28362306a36Sopenharmony_ci{ 28462306a36Sopenharmony_ci return (void *)((unsigned long)xdp | VETH_XDP_FLAG); 28562306a36Sopenharmony_ci} 28662306a36Sopenharmony_ci 28762306a36Sopenharmony_cistatic void veth_ptr_free(void *ptr) 28862306a36Sopenharmony_ci{ 28962306a36Sopenharmony_ci if (veth_is_xdp_frame(ptr)) 29062306a36Sopenharmony_ci xdp_return_frame(veth_ptr_to_xdp(ptr)); 29162306a36Sopenharmony_ci else 29262306a36Sopenharmony_ci kfree_skb(ptr); 29362306a36Sopenharmony_ci} 29462306a36Sopenharmony_ci 29562306a36Sopenharmony_cistatic void __veth_xdp_flush(struct veth_rq *rq) 29662306a36Sopenharmony_ci{ 29762306a36Sopenharmony_ci /* Write ptr_ring before reading rx_notify_masked */ 29862306a36Sopenharmony_ci smp_mb(); 29962306a36Sopenharmony_ci if (!READ_ONCE(rq->rx_notify_masked) && 30062306a36Sopenharmony_ci napi_schedule_prep(&rq->xdp_napi)) { 30162306a36Sopenharmony_ci WRITE_ONCE(rq->rx_notify_masked, true); 30262306a36Sopenharmony_ci __napi_schedule(&rq->xdp_napi); 30362306a36Sopenharmony_ci } 30462306a36Sopenharmony_ci} 30562306a36Sopenharmony_ci 30662306a36Sopenharmony_cistatic int veth_xdp_rx(struct veth_rq *rq, struct sk_buff *skb) 30762306a36Sopenharmony_ci{ 30862306a36Sopenharmony_ci if (unlikely(ptr_ring_produce(&rq->xdp_ring, skb))) { 30962306a36Sopenharmony_ci dev_kfree_skb_any(skb); 31062306a36Sopenharmony_ci return NET_RX_DROP; 31162306a36Sopenharmony_ci } 31262306a36Sopenharmony_ci 31362306a36Sopenharmony_ci return NET_RX_SUCCESS; 31462306a36Sopenharmony_ci} 31562306a36Sopenharmony_ci 31662306a36Sopenharmony_cistatic int veth_forward_skb(struct net_device *dev, struct sk_buff *skb, 31762306a36Sopenharmony_ci struct veth_rq *rq, bool xdp) 31862306a36Sopenharmony_ci{ 31962306a36Sopenharmony_ci return __dev_forward_skb(dev, skb) ?: xdp ? 32062306a36Sopenharmony_ci veth_xdp_rx(rq, skb) : 32162306a36Sopenharmony_ci __netif_rx(skb); 32262306a36Sopenharmony_ci} 32362306a36Sopenharmony_ci 32462306a36Sopenharmony_ci/* return true if the specified skb has chances of GRO aggregation 32562306a36Sopenharmony_ci * Don't strive for accuracy, but try to avoid GRO overhead in the most 32662306a36Sopenharmony_ci * common scenarios. 32762306a36Sopenharmony_ci * When XDP is enabled, all traffic is considered eligible, as the xmit 32862306a36Sopenharmony_ci * device has TSO off. 32962306a36Sopenharmony_ci * When TSO is enabled on the xmit device, we are likely interested only 33062306a36Sopenharmony_ci * in UDP aggregation, explicitly check for that if the skb is suspected 33162306a36Sopenharmony_ci * - the sock_wfree destructor is used by UDP, ICMP and XDP sockets - 33262306a36Sopenharmony_ci * to belong to locally generated UDP traffic. 33362306a36Sopenharmony_ci */ 33462306a36Sopenharmony_cistatic bool veth_skb_is_eligible_for_gro(const struct net_device *dev, 33562306a36Sopenharmony_ci const struct net_device *rcv, 33662306a36Sopenharmony_ci const struct sk_buff *skb) 33762306a36Sopenharmony_ci{ 33862306a36Sopenharmony_ci return !(dev->features & NETIF_F_ALL_TSO) || 33962306a36Sopenharmony_ci (skb->destructor == sock_wfree && 34062306a36Sopenharmony_ci rcv->features & (NETIF_F_GRO_FRAGLIST | NETIF_F_GRO_UDP_FWD)); 34162306a36Sopenharmony_ci} 34262306a36Sopenharmony_ci 34362306a36Sopenharmony_cistatic netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev) 34462306a36Sopenharmony_ci{ 34562306a36Sopenharmony_ci struct veth_priv *rcv_priv, *priv = netdev_priv(dev); 34662306a36Sopenharmony_ci struct veth_rq *rq = NULL; 34762306a36Sopenharmony_ci int ret = NETDEV_TX_OK; 34862306a36Sopenharmony_ci struct net_device *rcv; 34962306a36Sopenharmony_ci int length = skb->len; 35062306a36Sopenharmony_ci bool use_napi = false; 35162306a36Sopenharmony_ci int rxq; 35262306a36Sopenharmony_ci 35362306a36Sopenharmony_ci rcu_read_lock(); 35462306a36Sopenharmony_ci rcv = rcu_dereference(priv->peer); 35562306a36Sopenharmony_ci if (unlikely(!rcv) || !pskb_may_pull(skb, ETH_HLEN)) { 35662306a36Sopenharmony_ci kfree_skb(skb); 35762306a36Sopenharmony_ci goto drop; 35862306a36Sopenharmony_ci } 35962306a36Sopenharmony_ci 36062306a36Sopenharmony_ci rcv_priv = netdev_priv(rcv); 36162306a36Sopenharmony_ci rxq = skb_get_queue_mapping(skb); 36262306a36Sopenharmony_ci if (rxq < rcv->real_num_rx_queues) { 36362306a36Sopenharmony_ci rq = &rcv_priv->rq[rxq]; 36462306a36Sopenharmony_ci 36562306a36Sopenharmony_ci /* The napi pointer is available when an XDP program is 36662306a36Sopenharmony_ci * attached or when GRO is enabled 36762306a36Sopenharmony_ci * Don't bother with napi/GRO if the skb can't be aggregated 36862306a36Sopenharmony_ci */ 36962306a36Sopenharmony_ci use_napi = rcu_access_pointer(rq->napi) && 37062306a36Sopenharmony_ci veth_skb_is_eligible_for_gro(dev, rcv, skb); 37162306a36Sopenharmony_ci } 37262306a36Sopenharmony_ci 37362306a36Sopenharmony_ci skb_tx_timestamp(skb); 37462306a36Sopenharmony_ci if (likely(veth_forward_skb(rcv, skb, rq, use_napi) == NET_RX_SUCCESS)) { 37562306a36Sopenharmony_ci if (!use_napi) 37662306a36Sopenharmony_ci dev_sw_netstats_tx_add(dev, 1, length); 37762306a36Sopenharmony_ci else 37862306a36Sopenharmony_ci __veth_xdp_flush(rq); 37962306a36Sopenharmony_ci } else { 38062306a36Sopenharmony_cidrop: 38162306a36Sopenharmony_ci atomic64_inc(&priv->dropped); 38262306a36Sopenharmony_ci ret = NET_XMIT_DROP; 38362306a36Sopenharmony_ci } 38462306a36Sopenharmony_ci 38562306a36Sopenharmony_ci rcu_read_unlock(); 38662306a36Sopenharmony_ci 38762306a36Sopenharmony_ci return ret; 38862306a36Sopenharmony_ci} 38962306a36Sopenharmony_ci 39062306a36Sopenharmony_cistatic void veth_stats_rx(struct veth_stats *result, struct net_device *dev) 39162306a36Sopenharmony_ci{ 39262306a36Sopenharmony_ci struct veth_priv *priv = netdev_priv(dev); 39362306a36Sopenharmony_ci int i; 39462306a36Sopenharmony_ci 39562306a36Sopenharmony_ci result->peer_tq_xdp_xmit_err = 0; 39662306a36Sopenharmony_ci result->xdp_packets = 0; 39762306a36Sopenharmony_ci result->xdp_tx_err = 0; 39862306a36Sopenharmony_ci result->xdp_bytes = 0; 39962306a36Sopenharmony_ci result->rx_drops = 0; 40062306a36Sopenharmony_ci for (i = 0; i < dev->num_rx_queues; i++) { 40162306a36Sopenharmony_ci u64 packets, bytes, drops, xdp_tx_err, peer_tq_xdp_xmit_err; 40262306a36Sopenharmony_ci struct veth_rq_stats *stats = &priv->rq[i].stats; 40362306a36Sopenharmony_ci unsigned int start; 40462306a36Sopenharmony_ci 40562306a36Sopenharmony_ci do { 40662306a36Sopenharmony_ci start = u64_stats_fetch_begin(&stats->syncp); 40762306a36Sopenharmony_ci peer_tq_xdp_xmit_err = stats->vs.peer_tq_xdp_xmit_err; 40862306a36Sopenharmony_ci xdp_tx_err = stats->vs.xdp_tx_err; 40962306a36Sopenharmony_ci packets = stats->vs.xdp_packets; 41062306a36Sopenharmony_ci bytes = stats->vs.xdp_bytes; 41162306a36Sopenharmony_ci drops = stats->vs.rx_drops; 41262306a36Sopenharmony_ci } while (u64_stats_fetch_retry(&stats->syncp, start)); 41362306a36Sopenharmony_ci result->peer_tq_xdp_xmit_err += peer_tq_xdp_xmit_err; 41462306a36Sopenharmony_ci result->xdp_tx_err += xdp_tx_err; 41562306a36Sopenharmony_ci result->xdp_packets += packets; 41662306a36Sopenharmony_ci result->xdp_bytes += bytes; 41762306a36Sopenharmony_ci result->rx_drops += drops; 41862306a36Sopenharmony_ci } 41962306a36Sopenharmony_ci} 42062306a36Sopenharmony_ci 42162306a36Sopenharmony_cistatic void veth_get_stats64(struct net_device *dev, 42262306a36Sopenharmony_ci struct rtnl_link_stats64 *tot) 42362306a36Sopenharmony_ci{ 42462306a36Sopenharmony_ci struct veth_priv *priv = netdev_priv(dev); 42562306a36Sopenharmony_ci struct net_device *peer; 42662306a36Sopenharmony_ci struct veth_stats rx; 42762306a36Sopenharmony_ci 42862306a36Sopenharmony_ci tot->tx_dropped = atomic64_read(&priv->dropped); 42962306a36Sopenharmony_ci dev_fetch_sw_netstats(tot, dev->tstats); 43062306a36Sopenharmony_ci 43162306a36Sopenharmony_ci veth_stats_rx(&rx, dev); 43262306a36Sopenharmony_ci tot->tx_dropped += rx.xdp_tx_err; 43362306a36Sopenharmony_ci tot->rx_dropped = rx.rx_drops + rx.peer_tq_xdp_xmit_err; 43462306a36Sopenharmony_ci tot->rx_bytes += rx.xdp_bytes; 43562306a36Sopenharmony_ci tot->rx_packets += rx.xdp_packets; 43662306a36Sopenharmony_ci 43762306a36Sopenharmony_ci rcu_read_lock(); 43862306a36Sopenharmony_ci peer = rcu_dereference(priv->peer); 43962306a36Sopenharmony_ci if (peer) { 44062306a36Sopenharmony_ci struct rtnl_link_stats64 tot_peer = {}; 44162306a36Sopenharmony_ci 44262306a36Sopenharmony_ci dev_fetch_sw_netstats(&tot_peer, peer->tstats); 44362306a36Sopenharmony_ci tot->rx_bytes += tot_peer.tx_bytes; 44462306a36Sopenharmony_ci tot->rx_packets += tot_peer.tx_packets; 44562306a36Sopenharmony_ci 44662306a36Sopenharmony_ci veth_stats_rx(&rx, peer); 44762306a36Sopenharmony_ci tot->tx_dropped += rx.peer_tq_xdp_xmit_err; 44862306a36Sopenharmony_ci tot->rx_dropped += rx.xdp_tx_err; 44962306a36Sopenharmony_ci tot->tx_bytes += rx.xdp_bytes; 45062306a36Sopenharmony_ci tot->tx_packets += rx.xdp_packets; 45162306a36Sopenharmony_ci } 45262306a36Sopenharmony_ci rcu_read_unlock(); 45362306a36Sopenharmony_ci} 45462306a36Sopenharmony_ci 45562306a36Sopenharmony_ci/* fake multicast ability */ 45662306a36Sopenharmony_cistatic void veth_set_multicast_list(struct net_device *dev) 45762306a36Sopenharmony_ci{ 45862306a36Sopenharmony_ci} 45962306a36Sopenharmony_ci 46062306a36Sopenharmony_cistatic int veth_select_rxq(struct net_device *dev) 46162306a36Sopenharmony_ci{ 46262306a36Sopenharmony_ci return smp_processor_id() % dev->real_num_rx_queues; 46362306a36Sopenharmony_ci} 46462306a36Sopenharmony_ci 46562306a36Sopenharmony_cistatic struct net_device *veth_peer_dev(struct net_device *dev) 46662306a36Sopenharmony_ci{ 46762306a36Sopenharmony_ci struct veth_priv *priv = netdev_priv(dev); 46862306a36Sopenharmony_ci 46962306a36Sopenharmony_ci /* Callers must be under RCU read side. */ 47062306a36Sopenharmony_ci return rcu_dereference(priv->peer); 47162306a36Sopenharmony_ci} 47262306a36Sopenharmony_ci 47362306a36Sopenharmony_cistatic int veth_xdp_xmit(struct net_device *dev, int n, 47462306a36Sopenharmony_ci struct xdp_frame **frames, 47562306a36Sopenharmony_ci u32 flags, bool ndo_xmit) 47662306a36Sopenharmony_ci{ 47762306a36Sopenharmony_ci struct veth_priv *rcv_priv, *priv = netdev_priv(dev); 47862306a36Sopenharmony_ci int i, ret = -ENXIO, nxmit = 0; 47962306a36Sopenharmony_ci struct net_device *rcv; 48062306a36Sopenharmony_ci unsigned int max_len; 48162306a36Sopenharmony_ci struct veth_rq *rq; 48262306a36Sopenharmony_ci 48362306a36Sopenharmony_ci if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) 48462306a36Sopenharmony_ci return -EINVAL; 48562306a36Sopenharmony_ci 48662306a36Sopenharmony_ci rcu_read_lock(); 48762306a36Sopenharmony_ci rcv = rcu_dereference(priv->peer); 48862306a36Sopenharmony_ci if (unlikely(!rcv)) 48962306a36Sopenharmony_ci goto out; 49062306a36Sopenharmony_ci 49162306a36Sopenharmony_ci rcv_priv = netdev_priv(rcv); 49262306a36Sopenharmony_ci rq = &rcv_priv->rq[veth_select_rxq(rcv)]; 49362306a36Sopenharmony_ci /* The napi pointer is set if NAPI is enabled, which ensures that 49462306a36Sopenharmony_ci * xdp_ring is initialized on receive side and the peer device is up. 49562306a36Sopenharmony_ci */ 49662306a36Sopenharmony_ci if (!rcu_access_pointer(rq->napi)) 49762306a36Sopenharmony_ci goto out; 49862306a36Sopenharmony_ci 49962306a36Sopenharmony_ci max_len = rcv->mtu + rcv->hard_header_len + VLAN_HLEN; 50062306a36Sopenharmony_ci 50162306a36Sopenharmony_ci spin_lock(&rq->xdp_ring.producer_lock); 50262306a36Sopenharmony_ci for (i = 0; i < n; i++) { 50362306a36Sopenharmony_ci struct xdp_frame *frame = frames[i]; 50462306a36Sopenharmony_ci void *ptr = veth_xdp_to_ptr(frame); 50562306a36Sopenharmony_ci 50662306a36Sopenharmony_ci if (unlikely(xdp_get_frame_len(frame) > max_len || 50762306a36Sopenharmony_ci __ptr_ring_produce(&rq->xdp_ring, ptr))) 50862306a36Sopenharmony_ci break; 50962306a36Sopenharmony_ci nxmit++; 51062306a36Sopenharmony_ci } 51162306a36Sopenharmony_ci spin_unlock(&rq->xdp_ring.producer_lock); 51262306a36Sopenharmony_ci 51362306a36Sopenharmony_ci if (flags & XDP_XMIT_FLUSH) 51462306a36Sopenharmony_ci __veth_xdp_flush(rq); 51562306a36Sopenharmony_ci 51662306a36Sopenharmony_ci ret = nxmit; 51762306a36Sopenharmony_ci if (ndo_xmit) { 51862306a36Sopenharmony_ci u64_stats_update_begin(&rq->stats.syncp); 51962306a36Sopenharmony_ci rq->stats.vs.peer_tq_xdp_xmit += nxmit; 52062306a36Sopenharmony_ci rq->stats.vs.peer_tq_xdp_xmit_err += n - nxmit; 52162306a36Sopenharmony_ci u64_stats_update_end(&rq->stats.syncp); 52262306a36Sopenharmony_ci } 52362306a36Sopenharmony_ci 52462306a36Sopenharmony_ciout: 52562306a36Sopenharmony_ci rcu_read_unlock(); 52662306a36Sopenharmony_ci 52762306a36Sopenharmony_ci return ret; 52862306a36Sopenharmony_ci} 52962306a36Sopenharmony_ci 53062306a36Sopenharmony_cistatic int veth_ndo_xdp_xmit(struct net_device *dev, int n, 53162306a36Sopenharmony_ci struct xdp_frame **frames, u32 flags) 53262306a36Sopenharmony_ci{ 53362306a36Sopenharmony_ci int err; 53462306a36Sopenharmony_ci 53562306a36Sopenharmony_ci err = veth_xdp_xmit(dev, n, frames, flags, true); 53662306a36Sopenharmony_ci if (err < 0) { 53762306a36Sopenharmony_ci struct veth_priv *priv = netdev_priv(dev); 53862306a36Sopenharmony_ci 53962306a36Sopenharmony_ci atomic64_add(n, &priv->dropped); 54062306a36Sopenharmony_ci } 54162306a36Sopenharmony_ci 54262306a36Sopenharmony_ci return err; 54362306a36Sopenharmony_ci} 54462306a36Sopenharmony_ci 54562306a36Sopenharmony_cistatic void veth_xdp_flush_bq(struct veth_rq *rq, struct veth_xdp_tx_bq *bq) 54662306a36Sopenharmony_ci{ 54762306a36Sopenharmony_ci int sent, i, err = 0, drops; 54862306a36Sopenharmony_ci 54962306a36Sopenharmony_ci sent = veth_xdp_xmit(rq->dev, bq->count, bq->q, 0, false); 55062306a36Sopenharmony_ci if (sent < 0) { 55162306a36Sopenharmony_ci err = sent; 55262306a36Sopenharmony_ci sent = 0; 55362306a36Sopenharmony_ci } 55462306a36Sopenharmony_ci 55562306a36Sopenharmony_ci for (i = sent; unlikely(i < bq->count); i++) 55662306a36Sopenharmony_ci xdp_return_frame(bq->q[i]); 55762306a36Sopenharmony_ci 55862306a36Sopenharmony_ci drops = bq->count - sent; 55962306a36Sopenharmony_ci trace_xdp_bulk_tx(rq->dev, sent, drops, err); 56062306a36Sopenharmony_ci 56162306a36Sopenharmony_ci u64_stats_update_begin(&rq->stats.syncp); 56262306a36Sopenharmony_ci rq->stats.vs.xdp_tx += sent; 56362306a36Sopenharmony_ci rq->stats.vs.xdp_tx_err += drops; 56462306a36Sopenharmony_ci u64_stats_update_end(&rq->stats.syncp); 56562306a36Sopenharmony_ci 56662306a36Sopenharmony_ci bq->count = 0; 56762306a36Sopenharmony_ci} 56862306a36Sopenharmony_ci 56962306a36Sopenharmony_cistatic void veth_xdp_flush(struct veth_rq *rq, struct veth_xdp_tx_bq *bq) 57062306a36Sopenharmony_ci{ 57162306a36Sopenharmony_ci struct veth_priv *rcv_priv, *priv = netdev_priv(rq->dev); 57262306a36Sopenharmony_ci struct net_device *rcv; 57362306a36Sopenharmony_ci struct veth_rq *rcv_rq; 57462306a36Sopenharmony_ci 57562306a36Sopenharmony_ci rcu_read_lock(); 57662306a36Sopenharmony_ci veth_xdp_flush_bq(rq, bq); 57762306a36Sopenharmony_ci rcv = rcu_dereference(priv->peer); 57862306a36Sopenharmony_ci if (unlikely(!rcv)) 57962306a36Sopenharmony_ci goto out; 58062306a36Sopenharmony_ci 58162306a36Sopenharmony_ci rcv_priv = netdev_priv(rcv); 58262306a36Sopenharmony_ci rcv_rq = &rcv_priv->rq[veth_select_rxq(rcv)]; 58362306a36Sopenharmony_ci /* xdp_ring is initialized on receive side? */ 58462306a36Sopenharmony_ci if (unlikely(!rcu_access_pointer(rcv_rq->xdp_prog))) 58562306a36Sopenharmony_ci goto out; 58662306a36Sopenharmony_ci 58762306a36Sopenharmony_ci __veth_xdp_flush(rcv_rq); 58862306a36Sopenharmony_ciout: 58962306a36Sopenharmony_ci rcu_read_unlock(); 59062306a36Sopenharmony_ci} 59162306a36Sopenharmony_ci 59262306a36Sopenharmony_cistatic int veth_xdp_tx(struct veth_rq *rq, struct xdp_buff *xdp, 59362306a36Sopenharmony_ci struct veth_xdp_tx_bq *bq) 59462306a36Sopenharmony_ci{ 59562306a36Sopenharmony_ci struct xdp_frame *frame = xdp_convert_buff_to_frame(xdp); 59662306a36Sopenharmony_ci 59762306a36Sopenharmony_ci if (unlikely(!frame)) 59862306a36Sopenharmony_ci return -EOVERFLOW; 59962306a36Sopenharmony_ci 60062306a36Sopenharmony_ci if (unlikely(bq->count == VETH_XDP_TX_BULK_SIZE)) 60162306a36Sopenharmony_ci veth_xdp_flush_bq(rq, bq); 60262306a36Sopenharmony_ci 60362306a36Sopenharmony_ci bq->q[bq->count++] = frame; 60462306a36Sopenharmony_ci 60562306a36Sopenharmony_ci return 0; 60662306a36Sopenharmony_ci} 60762306a36Sopenharmony_ci 60862306a36Sopenharmony_cistatic struct xdp_frame *veth_xdp_rcv_one(struct veth_rq *rq, 60962306a36Sopenharmony_ci struct xdp_frame *frame, 61062306a36Sopenharmony_ci struct veth_xdp_tx_bq *bq, 61162306a36Sopenharmony_ci struct veth_stats *stats) 61262306a36Sopenharmony_ci{ 61362306a36Sopenharmony_ci struct xdp_frame orig_frame; 61462306a36Sopenharmony_ci struct bpf_prog *xdp_prog; 61562306a36Sopenharmony_ci 61662306a36Sopenharmony_ci rcu_read_lock(); 61762306a36Sopenharmony_ci xdp_prog = rcu_dereference(rq->xdp_prog); 61862306a36Sopenharmony_ci if (likely(xdp_prog)) { 61962306a36Sopenharmony_ci struct veth_xdp_buff vxbuf; 62062306a36Sopenharmony_ci struct xdp_buff *xdp = &vxbuf.xdp; 62162306a36Sopenharmony_ci u32 act; 62262306a36Sopenharmony_ci 62362306a36Sopenharmony_ci xdp_convert_frame_to_buff(frame, xdp); 62462306a36Sopenharmony_ci xdp->rxq = &rq->xdp_rxq; 62562306a36Sopenharmony_ci vxbuf.skb = NULL; 62662306a36Sopenharmony_ci 62762306a36Sopenharmony_ci act = bpf_prog_run_xdp(xdp_prog, xdp); 62862306a36Sopenharmony_ci 62962306a36Sopenharmony_ci switch (act) { 63062306a36Sopenharmony_ci case XDP_PASS: 63162306a36Sopenharmony_ci if (xdp_update_frame_from_buff(xdp, frame)) 63262306a36Sopenharmony_ci goto err_xdp; 63362306a36Sopenharmony_ci break; 63462306a36Sopenharmony_ci case XDP_TX: 63562306a36Sopenharmony_ci orig_frame = *frame; 63662306a36Sopenharmony_ci xdp->rxq->mem = frame->mem; 63762306a36Sopenharmony_ci if (unlikely(veth_xdp_tx(rq, xdp, bq) < 0)) { 63862306a36Sopenharmony_ci trace_xdp_exception(rq->dev, xdp_prog, act); 63962306a36Sopenharmony_ci frame = &orig_frame; 64062306a36Sopenharmony_ci stats->rx_drops++; 64162306a36Sopenharmony_ci goto err_xdp; 64262306a36Sopenharmony_ci } 64362306a36Sopenharmony_ci stats->xdp_tx++; 64462306a36Sopenharmony_ci rcu_read_unlock(); 64562306a36Sopenharmony_ci goto xdp_xmit; 64662306a36Sopenharmony_ci case XDP_REDIRECT: 64762306a36Sopenharmony_ci orig_frame = *frame; 64862306a36Sopenharmony_ci xdp->rxq->mem = frame->mem; 64962306a36Sopenharmony_ci if (xdp_do_redirect(rq->dev, xdp, xdp_prog)) { 65062306a36Sopenharmony_ci frame = &orig_frame; 65162306a36Sopenharmony_ci stats->rx_drops++; 65262306a36Sopenharmony_ci goto err_xdp; 65362306a36Sopenharmony_ci } 65462306a36Sopenharmony_ci stats->xdp_redirect++; 65562306a36Sopenharmony_ci rcu_read_unlock(); 65662306a36Sopenharmony_ci goto xdp_xmit; 65762306a36Sopenharmony_ci default: 65862306a36Sopenharmony_ci bpf_warn_invalid_xdp_action(rq->dev, xdp_prog, act); 65962306a36Sopenharmony_ci fallthrough; 66062306a36Sopenharmony_ci case XDP_ABORTED: 66162306a36Sopenharmony_ci trace_xdp_exception(rq->dev, xdp_prog, act); 66262306a36Sopenharmony_ci fallthrough; 66362306a36Sopenharmony_ci case XDP_DROP: 66462306a36Sopenharmony_ci stats->xdp_drops++; 66562306a36Sopenharmony_ci goto err_xdp; 66662306a36Sopenharmony_ci } 66762306a36Sopenharmony_ci } 66862306a36Sopenharmony_ci rcu_read_unlock(); 66962306a36Sopenharmony_ci 67062306a36Sopenharmony_ci return frame; 67162306a36Sopenharmony_cierr_xdp: 67262306a36Sopenharmony_ci rcu_read_unlock(); 67362306a36Sopenharmony_ci xdp_return_frame(frame); 67462306a36Sopenharmony_cixdp_xmit: 67562306a36Sopenharmony_ci return NULL; 67662306a36Sopenharmony_ci} 67762306a36Sopenharmony_ci 67862306a36Sopenharmony_ci/* frames array contains VETH_XDP_BATCH at most */ 67962306a36Sopenharmony_cistatic void veth_xdp_rcv_bulk_skb(struct veth_rq *rq, void **frames, 68062306a36Sopenharmony_ci int n_xdpf, struct veth_xdp_tx_bq *bq, 68162306a36Sopenharmony_ci struct veth_stats *stats) 68262306a36Sopenharmony_ci{ 68362306a36Sopenharmony_ci void *skbs[VETH_XDP_BATCH]; 68462306a36Sopenharmony_ci int i; 68562306a36Sopenharmony_ci 68662306a36Sopenharmony_ci if (xdp_alloc_skb_bulk(skbs, n_xdpf, 68762306a36Sopenharmony_ci GFP_ATOMIC | __GFP_ZERO) < 0) { 68862306a36Sopenharmony_ci for (i = 0; i < n_xdpf; i++) 68962306a36Sopenharmony_ci xdp_return_frame(frames[i]); 69062306a36Sopenharmony_ci stats->rx_drops += n_xdpf; 69162306a36Sopenharmony_ci 69262306a36Sopenharmony_ci return; 69362306a36Sopenharmony_ci } 69462306a36Sopenharmony_ci 69562306a36Sopenharmony_ci for (i = 0; i < n_xdpf; i++) { 69662306a36Sopenharmony_ci struct sk_buff *skb = skbs[i]; 69762306a36Sopenharmony_ci 69862306a36Sopenharmony_ci skb = __xdp_build_skb_from_frame(frames[i], skb, 69962306a36Sopenharmony_ci rq->dev); 70062306a36Sopenharmony_ci if (!skb) { 70162306a36Sopenharmony_ci xdp_return_frame(frames[i]); 70262306a36Sopenharmony_ci stats->rx_drops++; 70362306a36Sopenharmony_ci continue; 70462306a36Sopenharmony_ci } 70562306a36Sopenharmony_ci napi_gro_receive(&rq->xdp_napi, skb); 70662306a36Sopenharmony_ci } 70762306a36Sopenharmony_ci} 70862306a36Sopenharmony_ci 70962306a36Sopenharmony_cistatic void veth_xdp_get(struct xdp_buff *xdp) 71062306a36Sopenharmony_ci{ 71162306a36Sopenharmony_ci struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); 71262306a36Sopenharmony_ci int i; 71362306a36Sopenharmony_ci 71462306a36Sopenharmony_ci get_page(virt_to_page(xdp->data)); 71562306a36Sopenharmony_ci if (likely(!xdp_buff_has_frags(xdp))) 71662306a36Sopenharmony_ci return; 71762306a36Sopenharmony_ci 71862306a36Sopenharmony_ci for (i = 0; i < sinfo->nr_frags; i++) 71962306a36Sopenharmony_ci __skb_frag_ref(&sinfo->frags[i]); 72062306a36Sopenharmony_ci} 72162306a36Sopenharmony_ci 72262306a36Sopenharmony_cistatic int veth_convert_skb_to_xdp_buff(struct veth_rq *rq, 72362306a36Sopenharmony_ci struct xdp_buff *xdp, 72462306a36Sopenharmony_ci struct sk_buff **pskb) 72562306a36Sopenharmony_ci{ 72662306a36Sopenharmony_ci struct sk_buff *skb = *pskb; 72762306a36Sopenharmony_ci u32 frame_sz; 72862306a36Sopenharmony_ci 72962306a36Sopenharmony_ci if (skb_shared(skb) || skb_head_is_locked(skb) || 73062306a36Sopenharmony_ci skb_shinfo(skb)->nr_frags || 73162306a36Sopenharmony_ci skb_headroom(skb) < XDP_PACKET_HEADROOM) { 73262306a36Sopenharmony_ci u32 size, len, max_head_size, off; 73362306a36Sopenharmony_ci struct sk_buff *nskb; 73462306a36Sopenharmony_ci struct page *page; 73562306a36Sopenharmony_ci int i, head_off; 73662306a36Sopenharmony_ci 73762306a36Sopenharmony_ci /* We need a private copy of the skb and data buffers since 73862306a36Sopenharmony_ci * the ebpf program can modify it. We segment the original skb 73962306a36Sopenharmony_ci * into order-0 pages without linearize it. 74062306a36Sopenharmony_ci * 74162306a36Sopenharmony_ci * Make sure we have enough space for linear and paged area 74262306a36Sopenharmony_ci */ 74362306a36Sopenharmony_ci max_head_size = SKB_WITH_OVERHEAD(PAGE_SIZE - 74462306a36Sopenharmony_ci VETH_XDP_HEADROOM); 74562306a36Sopenharmony_ci if (skb->len > PAGE_SIZE * MAX_SKB_FRAGS + max_head_size) 74662306a36Sopenharmony_ci goto drop; 74762306a36Sopenharmony_ci 74862306a36Sopenharmony_ci /* Allocate skb head */ 74962306a36Sopenharmony_ci page = page_pool_dev_alloc_pages(rq->page_pool); 75062306a36Sopenharmony_ci if (!page) 75162306a36Sopenharmony_ci goto drop; 75262306a36Sopenharmony_ci 75362306a36Sopenharmony_ci nskb = napi_build_skb(page_address(page), PAGE_SIZE); 75462306a36Sopenharmony_ci if (!nskb) { 75562306a36Sopenharmony_ci page_pool_put_full_page(rq->page_pool, page, true); 75662306a36Sopenharmony_ci goto drop; 75762306a36Sopenharmony_ci } 75862306a36Sopenharmony_ci 75962306a36Sopenharmony_ci skb_reserve(nskb, VETH_XDP_HEADROOM); 76062306a36Sopenharmony_ci skb_copy_header(nskb, skb); 76162306a36Sopenharmony_ci skb_mark_for_recycle(nskb); 76262306a36Sopenharmony_ci 76362306a36Sopenharmony_ci size = min_t(u32, skb->len, max_head_size); 76462306a36Sopenharmony_ci if (skb_copy_bits(skb, 0, nskb->data, size)) { 76562306a36Sopenharmony_ci consume_skb(nskb); 76662306a36Sopenharmony_ci goto drop; 76762306a36Sopenharmony_ci } 76862306a36Sopenharmony_ci skb_put(nskb, size); 76962306a36Sopenharmony_ci 77062306a36Sopenharmony_ci head_off = skb_headroom(nskb) - skb_headroom(skb); 77162306a36Sopenharmony_ci skb_headers_offset_update(nskb, head_off); 77262306a36Sopenharmony_ci 77362306a36Sopenharmony_ci /* Allocate paged area of new skb */ 77462306a36Sopenharmony_ci off = size; 77562306a36Sopenharmony_ci len = skb->len - off; 77662306a36Sopenharmony_ci 77762306a36Sopenharmony_ci for (i = 0; i < MAX_SKB_FRAGS && off < skb->len; i++) { 77862306a36Sopenharmony_ci page = page_pool_dev_alloc_pages(rq->page_pool); 77962306a36Sopenharmony_ci if (!page) { 78062306a36Sopenharmony_ci consume_skb(nskb); 78162306a36Sopenharmony_ci goto drop; 78262306a36Sopenharmony_ci } 78362306a36Sopenharmony_ci 78462306a36Sopenharmony_ci size = min_t(u32, len, PAGE_SIZE); 78562306a36Sopenharmony_ci skb_add_rx_frag(nskb, i, page, 0, size, PAGE_SIZE); 78662306a36Sopenharmony_ci if (skb_copy_bits(skb, off, page_address(page), 78762306a36Sopenharmony_ci size)) { 78862306a36Sopenharmony_ci consume_skb(nskb); 78962306a36Sopenharmony_ci goto drop; 79062306a36Sopenharmony_ci } 79162306a36Sopenharmony_ci 79262306a36Sopenharmony_ci len -= size; 79362306a36Sopenharmony_ci off += size; 79462306a36Sopenharmony_ci } 79562306a36Sopenharmony_ci 79662306a36Sopenharmony_ci consume_skb(skb); 79762306a36Sopenharmony_ci skb = nskb; 79862306a36Sopenharmony_ci } 79962306a36Sopenharmony_ci 80062306a36Sopenharmony_ci /* SKB "head" area always have tailroom for skb_shared_info */ 80162306a36Sopenharmony_ci frame_sz = skb_end_pointer(skb) - skb->head; 80262306a36Sopenharmony_ci frame_sz += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 80362306a36Sopenharmony_ci xdp_init_buff(xdp, frame_sz, &rq->xdp_rxq); 80462306a36Sopenharmony_ci xdp_prepare_buff(xdp, skb->head, skb_headroom(skb), 80562306a36Sopenharmony_ci skb_headlen(skb), true); 80662306a36Sopenharmony_ci 80762306a36Sopenharmony_ci if (skb_is_nonlinear(skb)) { 80862306a36Sopenharmony_ci skb_shinfo(skb)->xdp_frags_size = skb->data_len; 80962306a36Sopenharmony_ci xdp_buff_set_frags_flag(xdp); 81062306a36Sopenharmony_ci } else { 81162306a36Sopenharmony_ci xdp_buff_clear_frags_flag(xdp); 81262306a36Sopenharmony_ci } 81362306a36Sopenharmony_ci *pskb = skb; 81462306a36Sopenharmony_ci 81562306a36Sopenharmony_ci return 0; 81662306a36Sopenharmony_cidrop: 81762306a36Sopenharmony_ci consume_skb(skb); 81862306a36Sopenharmony_ci *pskb = NULL; 81962306a36Sopenharmony_ci 82062306a36Sopenharmony_ci return -ENOMEM; 82162306a36Sopenharmony_ci} 82262306a36Sopenharmony_ci 82362306a36Sopenharmony_cistatic struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq, 82462306a36Sopenharmony_ci struct sk_buff *skb, 82562306a36Sopenharmony_ci struct veth_xdp_tx_bq *bq, 82662306a36Sopenharmony_ci struct veth_stats *stats) 82762306a36Sopenharmony_ci{ 82862306a36Sopenharmony_ci void *orig_data, *orig_data_end; 82962306a36Sopenharmony_ci struct bpf_prog *xdp_prog; 83062306a36Sopenharmony_ci struct veth_xdp_buff vxbuf; 83162306a36Sopenharmony_ci struct xdp_buff *xdp = &vxbuf.xdp; 83262306a36Sopenharmony_ci u32 act, metalen; 83362306a36Sopenharmony_ci int off; 83462306a36Sopenharmony_ci 83562306a36Sopenharmony_ci skb_prepare_for_gro(skb); 83662306a36Sopenharmony_ci 83762306a36Sopenharmony_ci rcu_read_lock(); 83862306a36Sopenharmony_ci xdp_prog = rcu_dereference(rq->xdp_prog); 83962306a36Sopenharmony_ci if (unlikely(!xdp_prog)) { 84062306a36Sopenharmony_ci rcu_read_unlock(); 84162306a36Sopenharmony_ci goto out; 84262306a36Sopenharmony_ci } 84362306a36Sopenharmony_ci 84462306a36Sopenharmony_ci __skb_push(skb, skb->data - skb_mac_header(skb)); 84562306a36Sopenharmony_ci if (veth_convert_skb_to_xdp_buff(rq, xdp, &skb)) 84662306a36Sopenharmony_ci goto drop; 84762306a36Sopenharmony_ci vxbuf.skb = skb; 84862306a36Sopenharmony_ci 84962306a36Sopenharmony_ci orig_data = xdp->data; 85062306a36Sopenharmony_ci orig_data_end = xdp->data_end; 85162306a36Sopenharmony_ci 85262306a36Sopenharmony_ci act = bpf_prog_run_xdp(xdp_prog, xdp); 85362306a36Sopenharmony_ci 85462306a36Sopenharmony_ci switch (act) { 85562306a36Sopenharmony_ci case XDP_PASS: 85662306a36Sopenharmony_ci break; 85762306a36Sopenharmony_ci case XDP_TX: 85862306a36Sopenharmony_ci veth_xdp_get(xdp); 85962306a36Sopenharmony_ci consume_skb(skb); 86062306a36Sopenharmony_ci xdp->rxq->mem = rq->xdp_mem; 86162306a36Sopenharmony_ci if (unlikely(veth_xdp_tx(rq, xdp, bq) < 0)) { 86262306a36Sopenharmony_ci trace_xdp_exception(rq->dev, xdp_prog, act); 86362306a36Sopenharmony_ci stats->rx_drops++; 86462306a36Sopenharmony_ci goto err_xdp; 86562306a36Sopenharmony_ci } 86662306a36Sopenharmony_ci stats->xdp_tx++; 86762306a36Sopenharmony_ci rcu_read_unlock(); 86862306a36Sopenharmony_ci goto xdp_xmit; 86962306a36Sopenharmony_ci case XDP_REDIRECT: 87062306a36Sopenharmony_ci veth_xdp_get(xdp); 87162306a36Sopenharmony_ci consume_skb(skb); 87262306a36Sopenharmony_ci xdp->rxq->mem = rq->xdp_mem; 87362306a36Sopenharmony_ci if (xdp_do_redirect(rq->dev, xdp, xdp_prog)) { 87462306a36Sopenharmony_ci stats->rx_drops++; 87562306a36Sopenharmony_ci goto err_xdp; 87662306a36Sopenharmony_ci } 87762306a36Sopenharmony_ci stats->xdp_redirect++; 87862306a36Sopenharmony_ci rcu_read_unlock(); 87962306a36Sopenharmony_ci goto xdp_xmit; 88062306a36Sopenharmony_ci default: 88162306a36Sopenharmony_ci bpf_warn_invalid_xdp_action(rq->dev, xdp_prog, act); 88262306a36Sopenharmony_ci fallthrough; 88362306a36Sopenharmony_ci case XDP_ABORTED: 88462306a36Sopenharmony_ci trace_xdp_exception(rq->dev, xdp_prog, act); 88562306a36Sopenharmony_ci fallthrough; 88662306a36Sopenharmony_ci case XDP_DROP: 88762306a36Sopenharmony_ci stats->xdp_drops++; 88862306a36Sopenharmony_ci goto xdp_drop; 88962306a36Sopenharmony_ci } 89062306a36Sopenharmony_ci rcu_read_unlock(); 89162306a36Sopenharmony_ci 89262306a36Sopenharmony_ci /* check if bpf_xdp_adjust_head was used */ 89362306a36Sopenharmony_ci off = orig_data - xdp->data; 89462306a36Sopenharmony_ci if (off > 0) 89562306a36Sopenharmony_ci __skb_push(skb, off); 89662306a36Sopenharmony_ci else if (off < 0) 89762306a36Sopenharmony_ci __skb_pull(skb, -off); 89862306a36Sopenharmony_ci 89962306a36Sopenharmony_ci skb_reset_mac_header(skb); 90062306a36Sopenharmony_ci 90162306a36Sopenharmony_ci /* check if bpf_xdp_adjust_tail was used */ 90262306a36Sopenharmony_ci off = xdp->data_end - orig_data_end; 90362306a36Sopenharmony_ci if (off != 0) 90462306a36Sopenharmony_ci __skb_put(skb, off); /* positive on grow, negative on shrink */ 90562306a36Sopenharmony_ci 90662306a36Sopenharmony_ci /* XDP frag metadata (e.g. nr_frags) are updated in eBPF helpers 90762306a36Sopenharmony_ci * (e.g. bpf_xdp_adjust_tail), we need to update data_len here. 90862306a36Sopenharmony_ci */ 90962306a36Sopenharmony_ci if (xdp_buff_has_frags(xdp)) 91062306a36Sopenharmony_ci skb->data_len = skb_shinfo(skb)->xdp_frags_size; 91162306a36Sopenharmony_ci else 91262306a36Sopenharmony_ci skb->data_len = 0; 91362306a36Sopenharmony_ci 91462306a36Sopenharmony_ci skb->protocol = eth_type_trans(skb, rq->dev); 91562306a36Sopenharmony_ci 91662306a36Sopenharmony_ci metalen = xdp->data - xdp->data_meta; 91762306a36Sopenharmony_ci if (metalen) 91862306a36Sopenharmony_ci skb_metadata_set(skb, metalen); 91962306a36Sopenharmony_ciout: 92062306a36Sopenharmony_ci return skb; 92162306a36Sopenharmony_cidrop: 92262306a36Sopenharmony_ci stats->rx_drops++; 92362306a36Sopenharmony_cixdp_drop: 92462306a36Sopenharmony_ci rcu_read_unlock(); 92562306a36Sopenharmony_ci kfree_skb(skb); 92662306a36Sopenharmony_ci return NULL; 92762306a36Sopenharmony_cierr_xdp: 92862306a36Sopenharmony_ci rcu_read_unlock(); 92962306a36Sopenharmony_ci xdp_return_buff(xdp); 93062306a36Sopenharmony_cixdp_xmit: 93162306a36Sopenharmony_ci return NULL; 93262306a36Sopenharmony_ci} 93362306a36Sopenharmony_ci 93462306a36Sopenharmony_cistatic int veth_xdp_rcv(struct veth_rq *rq, int budget, 93562306a36Sopenharmony_ci struct veth_xdp_tx_bq *bq, 93662306a36Sopenharmony_ci struct veth_stats *stats) 93762306a36Sopenharmony_ci{ 93862306a36Sopenharmony_ci int i, done = 0, n_xdpf = 0; 93962306a36Sopenharmony_ci void *xdpf[VETH_XDP_BATCH]; 94062306a36Sopenharmony_ci 94162306a36Sopenharmony_ci for (i = 0; i < budget; i++) { 94262306a36Sopenharmony_ci void *ptr = __ptr_ring_consume(&rq->xdp_ring); 94362306a36Sopenharmony_ci 94462306a36Sopenharmony_ci if (!ptr) 94562306a36Sopenharmony_ci break; 94662306a36Sopenharmony_ci 94762306a36Sopenharmony_ci if (veth_is_xdp_frame(ptr)) { 94862306a36Sopenharmony_ci /* ndo_xdp_xmit */ 94962306a36Sopenharmony_ci struct xdp_frame *frame = veth_ptr_to_xdp(ptr); 95062306a36Sopenharmony_ci 95162306a36Sopenharmony_ci stats->xdp_bytes += xdp_get_frame_len(frame); 95262306a36Sopenharmony_ci frame = veth_xdp_rcv_one(rq, frame, bq, stats); 95362306a36Sopenharmony_ci if (frame) { 95462306a36Sopenharmony_ci /* XDP_PASS */ 95562306a36Sopenharmony_ci xdpf[n_xdpf++] = frame; 95662306a36Sopenharmony_ci if (n_xdpf == VETH_XDP_BATCH) { 95762306a36Sopenharmony_ci veth_xdp_rcv_bulk_skb(rq, xdpf, n_xdpf, 95862306a36Sopenharmony_ci bq, stats); 95962306a36Sopenharmony_ci n_xdpf = 0; 96062306a36Sopenharmony_ci } 96162306a36Sopenharmony_ci } 96262306a36Sopenharmony_ci } else { 96362306a36Sopenharmony_ci /* ndo_start_xmit */ 96462306a36Sopenharmony_ci struct sk_buff *skb = ptr; 96562306a36Sopenharmony_ci 96662306a36Sopenharmony_ci stats->xdp_bytes += skb->len; 96762306a36Sopenharmony_ci skb = veth_xdp_rcv_skb(rq, skb, bq, stats); 96862306a36Sopenharmony_ci if (skb) { 96962306a36Sopenharmony_ci if (skb_shared(skb) || skb_unclone(skb, GFP_ATOMIC)) 97062306a36Sopenharmony_ci netif_receive_skb(skb); 97162306a36Sopenharmony_ci else 97262306a36Sopenharmony_ci napi_gro_receive(&rq->xdp_napi, skb); 97362306a36Sopenharmony_ci } 97462306a36Sopenharmony_ci } 97562306a36Sopenharmony_ci done++; 97662306a36Sopenharmony_ci } 97762306a36Sopenharmony_ci 97862306a36Sopenharmony_ci if (n_xdpf) 97962306a36Sopenharmony_ci veth_xdp_rcv_bulk_skb(rq, xdpf, n_xdpf, bq, stats); 98062306a36Sopenharmony_ci 98162306a36Sopenharmony_ci u64_stats_update_begin(&rq->stats.syncp); 98262306a36Sopenharmony_ci rq->stats.vs.xdp_redirect += stats->xdp_redirect; 98362306a36Sopenharmony_ci rq->stats.vs.xdp_bytes += stats->xdp_bytes; 98462306a36Sopenharmony_ci rq->stats.vs.xdp_drops += stats->xdp_drops; 98562306a36Sopenharmony_ci rq->stats.vs.rx_drops += stats->rx_drops; 98662306a36Sopenharmony_ci rq->stats.vs.xdp_packets += done; 98762306a36Sopenharmony_ci u64_stats_update_end(&rq->stats.syncp); 98862306a36Sopenharmony_ci 98962306a36Sopenharmony_ci return done; 99062306a36Sopenharmony_ci} 99162306a36Sopenharmony_ci 99262306a36Sopenharmony_cistatic int veth_poll(struct napi_struct *napi, int budget) 99362306a36Sopenharmony_ci{ 99462306a36Sopenharmony_ci struct veth_rq *rq = 99562306a36Sopenharmony_ci container_of(napi, struct veth_rq, xdp_napi); 99662306a36Sopenharmony_ci struct veth_stats stats = {}; 99762306a36Sopenharmony_ci struct veth_xdp_tx_bq bq; 99862306a36Sopenharmony_ci int done; 99962306a36Sopenharmony_ci 100062306a36Sopenharmony_ci bq.count = 0; 100162306a36Sopenharmony_ci 100262306a36Sopenharmony_ci xdp_set_return_frame_no_direct(); 100362306a36Sopenharmony_ci done = veth_xdp_rcv(rq, budget, &bq, &stats); 100462306a36Sopenharmony_ci 100562306a36Sopenharmony_ci if (stats.xdp_redirect > 0) 100662306a36Sopenharmony_ci xdp_do_flush(); 100762306a36Sopenharmony_ci 100862306a36Sopenharmony_ci if (done < budget && napi_complete_done(napi, done)) { 100962306a36Sopenharmony_ci /* Write rx_notify_masked before reading ptr_ring */ 101062306a36Sopenharmony_ci smp_store_mb(rq->rx_notify_masked, false); 101162306a36Sopenharmony_ci if (unlikely(!__ptr_ring_empty(&rq->xdp_ring))) { 101262306a36Sopenharmony_ci if (napi_schedule_prep(&rq->xdp_napi)) { 101362306a36Sopenharmony_ci WRITE_ONCE(rq->rx_notify_masked, true); 101462306a36Sopenharmony_ci __napi_schedule(&rq->xdp_napi); 101562306a36Sopenharmony_ci } 101662306a36Sopenharmony_ci } 101762306a36Sopenharmony_ci } 101862306a36Sopenharmony_ci 101962306a36Sopenharmony_ci if (stats.xdp_tx > 0) 102062306a36Sopenharmony_ci veth_xdp_flush(rq, &bq); 102162306a36Sopenharmony_ci xdp_clear_return_frame_no_direct(); 102262306a36Sopenharmony_ci 102362306a36Sopenharmony_ci return done; 102462306a36Sopenharmony_ci} 102562306a36Sopenharmony_ci 102662306a36Sopenharmony_cistatic int veth_create_page_pool(struct veth_rq *rq) 102762306a36Sopenharmony_ci{ 102862306a36Sopenharmony_ci struct page_pool_params pp_params = { 102962306a36Sopenharmony_ci .order = 0, 103062306a36Sopenharmony_ci .pool_size = VETH_RING_SIZE, 103162306a36Sopenharmony_ci .nid = NUMA_NO_NODE, 103262306a36Sopenharmony_ci .dev = &rq->dev->dev, 103362306a36Sopenharmony_ci }; 103462306a36Sopenharmony_ci 103562306a36Sopenharmony_ci rq->page_pool = page_pool_create(&pp_params); 103662306a36Sopenharmony_ci if (IS_ERR(rq->page_pool)) { 103762306a36Sopenharmony_ci int err = PTR_ERR(rq->page_pool); 103862306a36Sopenharmony_ci 103962306a36Sopenharmony_ci rq->page_pool = NULL; 104062306a36Sopenharmony_ci return err; 104162306a36Sopenharmony_ci } 104262306a36Sopenharmony_ci 104362306a36Sopenharmony_ci return 0; 104462306a36Sopenharmony_ci} 104562306a36Sopenharmony_ci 104662306a36Sopenharmony_cistatic int __veth_napi_enable_range(struct net_device *dev, int start, int end) 104762306a36Sopenharmony_ci{ 104862306a36Sopenharmony_ci struct veth_priv *priv = netdev_priv(dev); 104962306a36Sopenharmony_ci int err, i; 105062306a36Sopenharmony_ci 105162306a36Sopenharmony_ci for (i = start; i < end; i++) { 105262306a36Sopenharmony_ci err = veth_create_page_pool(&priv->rq[i]); 105362306a36Sopenharmony_ci if (err) 105462306a36Sopenharmony_ci goto err_page_pool; 105562306a36Sopenharmony_ci } 105662306a36Sopenharmony_ci 105762306a36Sopenharmony_ci for (i = start; i < end; i++) { 105862306a36Sopenharmony_ci struct veth_rq *rq = &priv->rq[i]; 105962306a36Sopenharmony_ci 106062306a36Sopenharmony_ci err = ptr_ring_init(&rq->xdp_ring, VETH_RING_SIZE, GFP_KERNEL); 106162306a36Sopenharmony_ci if (err) 106262306a36Sopenharmony_ci goto err_xdp_ring; 106362306a36Sopenharmony_ci } 106462306a36Sopenharmony_ci 106562306a36Sopenharmony_ci for (i = start; i < end; i++) { 106662306a36Sopenharmony_ci struct veth_rq *rq = &priv->rq[i]; 106762306a36Sopenharmony_ci 106862306a36Sopenharmony_ci napi_enable(&rq->xdp_napi); 106962306a36Sopenharmony_ci rcu_assign_pointer(priv->rq[i].napi, &priv->rq[i].xdp_napi); 107062306a36Sopenharmony_ci } 107162306a36Sopenharmony_ci 107262306a36Sopenharmony_ci return 0; 107362306a36Sopenharmony_ci 107462306a36Sopenharmony_cierr_xdp_ring: 107562306a36Sopenharmony_ci for (i--; i >= start; i--) 107662306a36Sopenharmony_ci ptr_ring_cleanup(&priv->rq[i].xdp_ring, veth_ptr_free); 107762306a36Sopenharmony_ci i = end; 107862306a36Sopenharmony_cierr_page_pool: 107962306a36Sopenharmony_ci for (i--; i >= start; i--) { 108062306a36Sopenharmony_ci page_pool_destroy(priv->rq[i].page_pool); 108162306a36Sopenharmony_ci priv->rq[i].page_pool = NULL; 108262306a36Sopenharmony_ci } 108362306a36Sopenharmony_ci 108462306a36Sopenharmony_ci return err; 108562306a36Sopenharmony_ci} 108662306a36Sopenharmony_ci 108762306a36Sopenharmony_cistatic int __veth_napi_enable(struct net_device *dev) 108862306a36Sopenharmony_ci{ 108962306a36Sopenharmony_ci return __veth_napi_enable_range(dev, 0, dev->real_num_rx_queues); 109062306a36Sopenharmony_ci} 109162306a36Sopenharmony_ci 109262306a36Sopenharmony_cistatic void veth_napi_del_range(struct net_device *dev, int start, int end) 109362306a36Sopenharmony_ci{ 109462306a36Sopenharmony_ci struct veth_priv *priv = netdev_priv(dev); 109562306a36Sopenharmony_ci int i; 109662306a36Sopenharmony_ci 109762306a36Sopenharmony_ci for (i = start; i < end; i++) { 109862306a36Sopenharmony_ci struct veth_rq *rq = &priv->rq[i]; 109962306a36Sopenharmony_ci 110062306a36Sopenharmony_ci rcu_assign_pointer(priv->rq[i].napi, NULL); 110162306a36Sopenharmony_ci napi_disable(&rq->xdp_napi); 110262306a36Sopenharmony_ci __netif_napi_del(&rq->xdp_napi); 110362306a36Sopenharmony_ci } 110462306a36Sopenharmony_ci synchronize_net(); 110562306a36Sopenharmony_ci 110662306a36Sopenharmony_ci for (i = start; i < end; i++) { 110762306a36Sopenharmony_ci struct veth_rq *rq = &priv->rq[i]; 110862306a36Sopenharmony_ci 110962306a36Sopenharmony_ci rq->rx_notify_masked = false; 111062306a36Sopenharmony_ci ptr_ring_cleanup(&rq->xdp_ring, veth_ptr_free); 111162306a36Sopenharmony_ci } 111262306a36Sopenharmony_ci 111362306a36Sopenharmony_ci for (i = start; i < end; i++) { 111462306a36Sopenharmony_ci page_pool_destroy(priv->rq[i].page_pool); 111562306a36Sopenharmony_ci priv->rq[i].page_pool = NULL; 111662306a36Sopenharmony_ci } 111762306a36Sopenharmony_ci} 111862306a36Sopenharmony_ci 111962306a36Sopenharmony_cistatic void veth_napi_del(struct net_device *dev) 112062306a36Sopenharmony_ci{ 112162306a36Sopenharmony_ci veth_napi_del_range(dev, 0, dev->real_num_rx_queues); 112262306a36Sopenharmony_ci} 112362306a36Sopenharmony_ci 112462306a36Sopenharmony_cistatic bool veth_gro_requested(const struct net_device *dev) 112562306a36Sopenharmony_ci{ 112662306a36Sopenharmony_ci return !!(dev->wanted_features & NETIF_F_GRO); 112762306a36Sopenharmony_ci} 112862306a36Sopenharmony_ci 112962306a36Sopenharmony_cistatic int veth_enable_xdp_range(struct net_device *dev, int start, int end, 113062306a36Sopenharmony_ci bool napi_already_on) 113162306a36Sopenharmony_ci{ 113262306a36Sopenharmony_ci struct veth_priv *priv = netdev_priv(dev); 113362306a36Sopenharmony_ci int err, i; 113462306a36Sopenharmony_ci 113562306a36Sopenharmony_ci for (i = start; i < end; i++) { 113662306a36Sopenharmony_ci struct veth_rq *rq = &priv->rq[i]; 113762306a36Sopenharmony_ci 113862306a36Sopenharmony_ci if (!napi_already_on) 113962306a36Sopenharmony_ci netif_napi_add(dev, &rq->xdp_napi, veth_poll); 114062306a36Sopenharmony_ci err = xdp_rxq_info_reg(&rq->xdp_rxq, dev, i, rq->xdp_napi.napi_id); 114162306a36Sopenharmony_ci if (err < 0) 114262306a36Sopenharmony_ci goto err_rxq_reg; 114362306a36Sopenharmony_ci 114462306a36Sopenharmony_ci err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq, 114562306a36Sopenharmony_ci MEM_TYPE_PAGE_SHARED, 114662306a36Sopenharmony_ci NULL); 114762306a36Sopenharmony_ci if (err < 0) 114862306a36Sopenharmony_ci goto err_reg_mem; 114962306a36Sopenharmony_ci 115062306a36Sopenharmony_ci /* Save original mem info as it can be overwritten */ 115162306a36Sopenharmony_ci rq->xdp_mem = rq->xdp_rxq.mem; 115262306a36Sopenharmony_ci } 115362306a36Sopenharmony_ci return 0; 115462306a36Sopenharmony_ci 115562306a36Sopenharmony_cierr_reg_mem: 115662306a36Sopenharmony_ci xdp_rxq_info_unreg(&priv->rq[i].xdp_rxq); 115762306a36Sopenharmony_cierr_rxq_reg: 115862306a36Sopenharmony_ci for (i--; i >= start; i--) { 115962306a36Sopenharmony_ci struct veth_rq *rq = &priv->rq[i]; 116062306a36Sopenharmony_ci 116162306a36Sopenharmony_ci xdp_rxq_info_unreg(&rq->xdp_rxq); 116262306a36Sopenharmony_ci if (!napi_already_on) 116362306a36Sopenharmony_ci netif_napi_del(&rq->xdp_napi); 116462306a36Sopenharmony_ci } 116562306a36Sopenharmony_ci 116662306a36Sopenharmony_ci return err; 116762306a36Sopenharmony_ci} 116862306a36Sopenharmony_ci 116962306a36Sopenharmony_cistatic void veth_disable_xdp_range(struct net_device *dev, int start, int end, 117062306a36Sopenharmony_ci bool delete_napi) 117162306a36Sopenharmony_ci{ 117262306a36Sopenharmony_ci struct veth_priv *priv = netdev_priv(dev); 117362306a36Sopenharmony_ci int i; 117462306a36Sopenharmony_ci 117562306a36Sopenharmony_ci for (i = start; i < end; i++) { 117662306a36Sopenharmony_ci struct veth_rq *rq = &priv->rq[i]; 117762306a36Sopenharmony_ci 117862306a36Sopenharmony_ci rq->xdp_rxq.mem = rq->xdp_mem; 117962306a36Sopenharmony_ci xdp_rxq_info_unreg(&rq->xdp_rxq); 118062306a36Sopenharmony_ci 118162306a36Sopenharmony_ci if (delete_napi) 118262306a36Sopenharmony_ci netif_napi_del(&rq->xdp_napi); 118362306a36Sopenharmony_ci } 118462306a36Sopenharmony_ci} 118562306a36Sopenharmony_ci 118662306a36Sopenharmony_cistatic int veth_enable_xdp(struct net_device *dev) 118762306a36Sopenharmony_ci{ 118862306a36Sopenharmony_ci bool napi_already_on = veth_gro_requested(dev) && (dev->flags & IFF_UP); 118962306a36Sopenharmony_ci struct veth_priv *priv = netdev_priv(dev); 119062306a36Sopenharmony_ci int err, i; 119162306a36Sopenharmony_ci 119262306a36Sopenharmony_ci if (!xdp_rxq_info_is_reg(&priv->rq[0].xdp_rxq)) { 119362306a36Sopenharmony_ci err = veth_enable_xdp_range(dev, 0, dev->real_num_rx_queues, napi_already_on); 119462306a36Sopenharmony_ci if (err) 119562306a36Sopenharmony_ci return err; 119662306a36Sopenharmony_ci 119762306a36Sopenharmony_ci if (!napi_already_on) { 119862306a36Sopenharmony_ci err = __veth_napi_enable(dev); 119962306a36Sopenharmony_ci if (err) { 120062306a36Sopenharmony_ci veth_disable_xdp_range(dev, 0, dev->real_num_rx_queues, true); 120162306a36Sopenharmony_ci return err; 120262306a36Sopenharmony_ci } 120362306a36Sopenharmony_ci } 120462306a36Sopenharmony_ci } 120562306a36Sopenharmony_ci 120662306a36Sopenharmony_ci for (i = 0; i < dev->real_num_rx_queues; i++) { 120762306a36Sopenharmony_ci rcu_assign_pointer(priv->rq[i].xdp_prog, priv->_xdp_prog); 120862306a36Sopenharmony_ci rcu_assign_pointer(priv->rq[i].napi, &priv->rq[i].xdp_napi); 120962306a36Sopenharmony_ci } 121062306a36Sopenharmony_ci 121162306a36Sopenharmony_ci return 0; 121262306a36Sopenharmony_ci} 121362306a36Sopenharmony_ci 121462306a36Sopenharmony_cistatic void veth_disable_xdp(struct net_device *dev) 121562306a36Sopenharmony_ci{ 121662306a36Sopenharmony_ci struct veth_priv *priv = netdev_priv(dev); 121762306a36Sopenharmony_ci int i; 121862306a36Sopenharmony_ci 121962306a36Sopenharmony_ci for (i = 0; i < dev->real_num_rx_queues; i++) 122062306a36Sopenharmony_ci rcu_assign_pointer(priv->rq[i].xdp_prog, NULL); 122162306a36Sopenharmony_ci 122262306a36Sopenharmony_ci if (!netif_running(dev) || !veth_gro_requested(dev)) 122362306a36Sopenharmony_ci veth_napi_del(dev); 122462306a36Sopenharmony_ci 122562306a36Sopenharmony_ci veth_disable_xdp_range(dev, 0, dev->real_num_rx_queues, false); 122662306a36Sopenharmony_ci} 122762306a36Sopenharmony_ci 122862306a36Sopenharmony_cistatic int veth_napi_enable_range(struct net_device *dev, int start, int end) 122962306a36Sopenharmony_ci{ 123062306a36Sopenharmony_ci struct veth_priv *priv = netdev_priv(dev); 123162306a36Sopenharmony_ci int err, i; 123262306a36Sopenharmony_ci 123362306a36Sopenharmony_ci for (i = start; i < end; i++) { 123462306a36Sopenharmony_ci struct veth_rq *rq = &priv->rq[i]; 123562306a36Sopenharmony_ci 123662306a36Sopenharmony_ci netif_napi_add(dev, &rq->xdp_napi, veth_poll); 123762306a36Sopenharmony_ci } 123862306a36Sopenharmony_ci 123962306a36Sopenharmony_ci err = __veth_napi_enable_range(dev, start, end); 124062306a36Sopenharmony_ci if (err) { 124162306a36Sopenharmony_ci for (i = start; i < end; i++) { 124262306a36Sopenharmony_ci struct veth_rq *rq = &priv->rq[i]; 124362306a36Sopenharmony_ci 124462306a36Sopenharmony_ci netif_napi_del(&rq->xdp_napi); 124562306a36Sopenharmony_ci } 124662306a36Sopenharmony_ci return err; 124762306a36Sopenharmony_ci } 124862306a36Sopenharmony_ci return err; 124962306a36Sopenharmony_ci} 125062306a36Sopenharmony_ci 125162306a36Sopenharmony_cistatic int veth_napi_enable(struct net_device *dev) 125262306a36Sopenharmony_ci{ 125362306a36Sopenharmony_ci return veth_napi_enable_range(dev, 0, dev->real_num_rx_queues); 125462306a36Sopenharmony_ci} 125562306a36Sopenharmony_ci 125662306a36Sopenharmony_cistatic void veth_disable_range_safe(struct net_device *dev, int start, int end) 125762306a36Sopenharmony_ci{ 125862306a36Sopenharmony_ci struct veth_priv *priv = netdev_priv(dev); 125962306a36Sopenharmony_ci 126062306a36Sopenharmony_ci if (start >= end) 126162306a36Sopenharmony_ci return; 126262306a36Sopenharmony_ci 126362306a36Sopenharmony_ci if (priv->_xdp_prog) { 126462306a36Sopenharmony_ci veth_napi_del_range(dev, start, end); 126562306a36Sopenharmony_ci veth_disable_xdp_range(dev, start, end, false); 126662306a36Sopenharmony_ci } else if (veth_gro_requested(dev)) { 126762306a36Sopenharmony_ci veth_napi_del_range(dev, start, end); 126862306a36Sopenharmony_ci } 126962306a36Sopenharmony_ci} 127062306a36Sopenharmony_ci 127162306a36Sopenharmony_cistatic int veth_enable_range_safe(struct net_device *dev, int start, int end) 127262306a36Sopenharmony_ci{ 127362306a36Sopenharmony_ci struct veth_priv *priv = netdev_priv(dev); 127462306a36Sopenharmony_ci int err; 127562306a36Sopenharmony_ci 127662306a36Sopenharmony_ci if (start >= end) 127762306a36Sopenharmony_ci return 0; 127862306a36Sopenharmony_ci 127962306a36Sopenharmony_ci if (priv->_xdp_prog) { 128062306a36Sopenharmony_ci /* these channels are freshly initialized, napi is not on there even 128162306a36Sopenharmony_ci * when GRO is requeste 128262306a36Sopenharmony_ci */ 128362306a36Sopenharmony_ci err = veth_enable_xdp_range(dev, start, end, false); 128462306a36Sopenharmony_ci if (err) 128562306a36Sopenharmony_ci return err; 128662306a36Sopenharmony_ci 128762306a36Sopenharmony_ci err = __veth_napi_enable_range(dev, start, end); 128862306a36Sopenharmony_ci if (err) { 128962306a36Sopenharmony_ci /* on error always delete the newly added napis */ 129062306a36Sopenharmony_ci veth_disable_xdp_range(dev, start, end, true); 129162306a36Sopenharmony_ci return err; 129262306a36Sopenharmony_ci } 129362306a36Sopenharmony_ci } else if (veth_gro_requested(dev)) { 129462306a36Sopenharmony_ci return veth_napi_enable_range(dev, start, end); 129562306a36Sopenharmony_ci } 129662306a36Sopenharmony_ci return 0; 129762306a36Sopenharmony_ci} 129862306a36Sopenharmony_ci 129962306a36Sopenharmony_cistatic void veth_set_xdp_features(struct net_device *dev) 130062306a36Sopenharmony_ci{ 130162306a36Sopenharmony_ci struct veth_priv *priv = netdev_priv(dev); 130262306a36Sopenharmony_ci struct net_device *peer; 130362306a36Sopenharmony_ci 130462306a36Sopenharmony_ci peer = rtnl_dereference(priv->peer); 130562306a36Sopenharmony_ci if (peer && peer->real_num_tx_queues <= dev->real_num_rx_queues) { 130662306a36Sopenharmony_ci struct veth_priv *priv_peer = netdev_priv(peer); 130762306a36Sopenharmony_ci xdp_features_t val = NETDEV_XDP_ACT_BASIC | 130862306a36Sopenharmony_ci NETDEV_XDP_ACT_REDIRECT | 130962306a36Sopenharmony_ci NETDEV_XDP_ACT_RX_SG; 131062306a36Sopenharmony_ci 131162306a36Sopenharmony_ci if (priv_peer->_xdp_prog || veth_gro_requested(peer)) 131262306a36Sopenharmony_ci val |= NETDEV_XDP_ACT_NDO_XMIT | 131362306a36Sopenharmony_ci NETDEV_XDP_ACT_NDO_XMIT_SG; 131462306a36Sopenharmony_ci xdp_set_features_flag(dev, val); 131562306a36Sopenharmony_ci } else { 131662306a36Sopenharmony_ci xdp_clear_features_flag(dev); 131762306a36Sopenharmony_ci } 131862306a36Sopenharmony_ci} 131962306a36Sopenharmony_ci 132062306a36Sopenharmony_cistatic int veth_set_channels(struct net_device *dev, 132162306a36Sopenharmony_ci struct ethtool_channels *ch) 132262306a36Sopenharmony_ci{ 132362306a36Sopenharmony_ci struct veth_priv *priv = netdev_priv(dev); 132462306a36Sopenharmony_ci unsigned int old_rx_count, new_rx_count; 132562306a36Sopenharmony_ci struct veth_priv *peer_priv; 132662306a36Sopenharmony_ci struct net_device *peer; 132762306a36Sopenharmony_ci int err; 132862306a36Sopenharmony_ci 132962306a36Sopenharmony_ci /* sanity check. Upper bounds are already enforced by the caller */ 133062306a36Sopenharmony_ci if (!ch->rx_count || !ch->tx_count) 133162306a36Sopenharmony_ci return -EINVAL; 133262306a36Sopenharmony_ci 133362306a36Sopenharmony_ci /* avoid braking XDP, if that is enabled */ 133462306a36Sopenharmony_ci peer = rtnl_dereference(priv->peer); 133562306a36Sopenharmony_ci peer_priv = peer ? netdev_priv(peer) : NULL; 133662306a36Sopenharmony_ci if (priv->_xdp_prog && peer && ch->rx_count < peer->real_num_tx_queues) 133762306a36Sopenharmony_ci return -EINVAL; 133862306a36Sopenharmony_ci 133962306a36Sopenharmony_ci if (peer && peer_priv && peer_priv->_xdp_prog && ch->tx_count > peer->real_num_rx_queues) 134062306a36Sopenharmony_ci return -EINVAL; 134162306a36Sopenharmony_ci 134262306a36Sopenharmony_ci old_rx_count = dev->real_num_rx_queues; 134362306a36Sopenharmony_ci new_rx_count = ch->rx_count; 134462306a36Sopenharmony_ci if (netif_running(dev)) { 134562306a36Sopenharmony_ci /* turn device off */ 134662306a36Sopenharmony_ci netif_carrier_off(dev); 134762306a36Sopenharmony_ci if (peer) 134862306a36Sopenharmony_ci netif_carrier_off(peer); 134962306a36Sopenharmony_ci 135062306a36Sopenharmony_ci /* try to allocate new resurces, as needed*/ 135162306a36Sopenharmony_ci err = veth_enable_range_safe(dev, old_rx_count, new_rx_count); 135262306a36Sopenharmony_ci if (err) 135362306a36Sopenharmony_ci goto out; 135462306a36Sopenharmony_ci } 135562306a36Sopenharmony_ci 135662306a36Sopenharmony_ci err = netif_set_real_num_rx_queues(dev, ch->rx_count); 135762306a36Sopenharmony_ci if (err) 135862306a36Sopenharmony_ci goto revert; 135962306a36Sopenharmony_ci 136062306a36Sopenharmony_ci err = netif_set_real_num_tx_queues(dev, ch->tx_count); 136162306a36Sopenharmony_ci if (err) { 136262306a36Sopenharmony_ci int err2 = netif_set_real_num_rx_queues(dev, old_rx_count); 136362306a36Sopenharmony_ci 136462306a36Sopenharmony_ci /* this error condition could happen only if rx and tx change 136562306a36Sopenharmony_ci * in opposite directions (e.g. tx nr raises, rx nr decreases) 136662306a36Sopenharmony_ci * and we can't do anything to fully restore the original 136762306a36Sopenharmony_ci * status 136862306a36Sopenharmony_ci */ 136962306a36Sopenharmony_ci if (err2) 137062306a36Sopenharmony_ci pr_warn("Can't restore rx queues config %d -> %d %d", 137162306a36Sopenharmony_ci new_rx_count, old_rx_count, err2); 137262306a36Sopenharmony_ci else 137362306a36Sopenharmony_ci goto revert; 137462306a36Sopenharmony_ci } 137562306a36Sopenharmony_ci 137662306a36Sopenharmony_ciout: 137762306a36Sopenharmony_ci if (netif_running(dev)) { 137862306a36Sopenharmony_ci /* note that we need to swap the arguments WRT the enable part 137962306a36Sopenharmony_ci * to identify the range we have to disable 138062306a36Sopenharmony_ci */ 138162306a36Sopenharmony_ci veth_disable_range_safe(dev, new_rx_count, old_rx_count); 138262306a36Sopenharmony_ci netif_carrier_on(dev); 138362306a36Sopenharmony_ci if (peer) 138462306a36Sopenharmony_ci netif_carrier_on(peer); 138562306a36Sopenharmony_ci } 138662306a36Sopenharmony_ci 138762306a36Sopenharmony_ci /* update XDP supported features */ 138862306a36Sopenharmony_ci veth_set_xdp_features(dev); 138962306a36Sopenharmony_ci if (peer) 139062306a36Sopenharmony_ci veth_set_xdp_features(peer); 139162306a36Sopenharmony_ci 139262306a36Sopenharmony_ci return err; 139362306a36Sopenharmony_ci 139462306a36Sopenharmony_cirevert: 139562306a36Sopenharmony_ci new_rx_count = old_rx_count; 139662306a36Sopenharmony_ci old_rx_count = ch->rx_count; 139762306a36Sopenharmony_ci goto out; 139862306a36Sopenharmony_ci} 139962306a36Sopenharmony_ci 140062306a36Sopenharmony_cistatic int veth_open(struct net_device *dev) 140162306a36Sopenharmony_ci{ 140262306a36Sopenharmony_ci struct veth_priv *priv = netdev_priv(dev); 140362306a36Sopenharmony_ci struct net_device *peer = rtnl_dereference(priv->peer); 140462306a36Sopenharmony_ci int err; 140562306a36Sopenharmony_ci 140662306a36Sopenharmony_ci if (!peer) 140762306a36Sopenharmony_ci return -ENOTCONN; 140862306a36Sopenharmony_ci 140962306a36Sopenharmony_ci if (priv->_xdp_prog) { 141062306a36Sopenharmony_ci err = veth_enable_xdp(dev); 141162306a36Sopenharmony_ci if (err) 141262306a36Sopenharmony_ci return err; 141362306a36Sopenharmony_ci } else if (veth_gro_requested(dev)) { 141462306a36Sopenharmony_ci err = veth_napi_enable(dev); 141562306a36Sopenharmony_ci if (err) 141662306a36Sopenharmony_ci return err; 141762306a36Sopenharmony_ci } 141862306a36Sopenharmony_ci 141962306a36Sopenharmony_ci if (peer->flags & IFF_UP) { 142062306a36Sopenharmony_ci netif_carrier_on(dev); 142162306a36Sopenharmony_ci netif_carrier_on(peer); 142262306a36Sopenharmony_ci } 142362306a36Sopenharmony_ci 142462306a36Sopenharmony_ci veth_set_xdp_features(dev); 142562306a36Sopenharmony_ci 142662306a36Sopenharmony_ci return 0; 142762306a36Sopenharmony_ci} 142862306a36Sopenharmony_ci 142962306a36Sopenharmony_cistatic int veth_close(struct net_device *dev) 143062306a36Sopenharmony_ci{ 143162306a36Sopenharmony_ci struct veth_priv *priv = netdev_priv(dev); 143262306a36Sopenharmony_ci struct net_device *peer = rtnl_dereference(priv->peer); 143362306a36Sopenharmony_ci 143462306a36Sopenharmony_ci netif_carrier_off(dev); 143562306a36Sopenharmony_ci if (peer) 143662306a36Sopenharmony_ci netif_carrier_off(peer); 143762306a36Sopenharmony_ci 143862306a36Sopenharmony_ci if (priv->_xdp_prog) 143962306a36Sopenharmony_ci veth_disable_xdp(dev); 144062306a36Sopenharmony_ci else if (veth_gro_requested(dev)) 144162306a36Sopenharmony_ci veth_napi_del(dev); 144262306a36Sopenharmony_ci 144362306a36Sopenharmony_ci return 0; 144462306a36Sopenharmony_ci} 144562306a36Sopenharmony_ci 144662306a36Sopenharmony_cistatic int is_valid_veth_mtu(int mtu) 144762306a36Sopenharmony_ci{ 144862306a36Sopenharmony_ci return mtu >= ETH_MIN_MTU && mtu <= ETH_MAX_MTU; 144962306a36Sopenharmony_ci} 145062306a36Sopenharmony_ci 145162306a36Sopenharmony_cistatic int veth_alloc_queues(struct net_device *dev) 145262306a36Sopenharmony_ci{ 145362306a36Sopenharmony_ci struct veth_priv *priv = netdev_priv(dev); 145462306a36Sopenharmony_ci int i; 145562306a36Sopenharmony_ci 145662306a36Sopenharmony_ci priv->rq = kvcalloc(dev->num_rx_queues, sizeof(*priv->rq), 145762306a36Sopenharmony_ci GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL); 145862306a36Sopenharmony_ci if (!priv->rq) 145962306a36Sopenharmony_ci return -ENOMEM; 146062306a36Sopenharmony_ci 146162306a36Sopenharmony_ci for (i = 0; i < dev->num_rx_queues; i++) { 146262306a36Sopenharmony_ci priv->rq[i].dev = dev; 146362306a36Sopenharmony_ci u64_stats_init(&priv->rq[i].stats.syncp); 146462306a36Sopenharmony_ci } 146562306a36Sopenharmony_ci 146662306a36Sopenharmony_ci return 0; 146762306a36Sopenharmony_ci} 146862306a36Sopenharmony_ci 146962306a36Sopenharmony_cistatic void veth_free_queues(struct net_device *dev) 147062306a36Sopenharmony_ci{ 147162306a36Sopenharmony_ci struct veth_priv *priv = netdev_priv(dev); 147262306a36Sopenharmony_ci 147362306a36Sopenharmony_ci kvfree(priv->rq); 147462306a36Sopenharmony_ci} 147562306a36Sopenharmony_ci 147662306a36Sopenharmony_cistatic int veth_dev_init(struct net_device *dev) 147762306a36Sopenharmony_ci{ 147862306a36Sopenharmony_ci return veth_alloc_queues(dev); 147962306a36Sopenharmony_ci} 148062306a36Sopenharmony_ci 148162306a36Sopenharmony_cistatic void veth_dev_free(struct net_device *dev) 148262306a36Sopenharmony_ci{ 148362306a36Sopenharmony_ci veth_free_queues(dev); 148462306a36Sopenharmony_ci} 148562306a36Sopenharmony_ci 148662306a36Sopenharmony_ci#ifdef CONFIG_NET_POLL_CONTROLLER 148762306a36Sopenharmony_cistatic void veth_poll_controller(struct net_device *dev) 148862306a36Sopenharmony_ci{ 148962306a36Sopenharmony_ci /* veth only receives frames when its peer sends one 149062306a36Sopenharmony_ci * Since it has nothing to do with disabling irqs, we are guaranteed 149162306a36Sopenharmony_ci * never to have pending data when we poll for it so 149262306a36Sopenharmony_ci * there is nothing to do here. 149362306a36Sopenharmony_ci * 149462306a36Sopenharmony_ci * We need this though so netpoll recognizes us as an interface that 149562306a36Sopenharmony_ci * supports polling, which enables bridge devices in virt setups to 149662306a36Sopenharmony_ci * still use netconsole 149762306a36Sopenharmony_ci */ 149862306a36Sopenharmony_ci} 149962306a36Sopenharmony_ci#endif /* CONFIG_NET_POLL_CONTROLLER */ 150062306a36Sopenharmony_ci 150162306a36Sopenharmony_cistatic int veth_get_iflink(const struct net_device *dev) 150262306a36Sopenharmony_ci{ 150362306a36Sopenharmony_ci struct veth_priv *priv = netdev_priv(dev); 150462306a36Sopenharmony_ci struct net_device *peer; 150562306a36Sopenharmony_ci int iflink; 150662306a36Sopenharmony_ci 150762306a36Sopenharmony_ci rcu_read_lock(); 150862306a36Sopenharmony_ci peer = rcu_dereference(priv->peer); 150962306a36Sopenharmony_ci iflink = peer ? peer->ifindex : 0; 151062306a36Sopenharmony_ci rcu_read_unlock(); 151162306a36Sopenharmony_ci 151262306a36Sopenharmony_ci return iflink; 151362306a36Sopenharmony_ci} 151462306a36Sopenharmony_ci 151562306a36Sopenharmony_cistatic netdev_features_t veth_fix_features(struct net_device *dev, 151662306a36Sopenharmony_ci netdev_features_t features) 151762306a36Sopenharmony_ci{ 151862306a36Sopenharmony_ci struct veth_priv *priv = netdev_priv(dev); 151962306a36Sopenharmony_ci struct net_device *peer; 152062306a36Sopenharmony_ci 152162306a36Sopenharmony_ci peer = rtnl_dereference(priv->peer); 152262306a36Sopenharmony_ci if (peer) { 152362306a36Sopenharmony_ci struct veth_priv *peer_priv = netdev_priv(peer); 152462306a36Sopenharmony_ci 152562306a36Sopenharmony_ci if (peer_priv->_xdp_prog) 152662306a36Sopenharmony_ci features &= ~NETIF_F_GSO_SOFTWARE; 152762306a36Sopenharmony_ci } 152862306a36Sopenharmony_ci 152962306a36Sopenharmony_ci return features; 153062306a36Sopenharmony_ci} 153162306a36Sopenharmony_ci 153262306a36Sopenharmony_cistatic int veth_set_features(struct net_device *dev, 153362306a36Sopenharmony_ci netdev_features_t features) 153462306a36Sopenharmony_ci{ 153562306a36Sopenharmony_ci netdev_features_t changed = features ^ dev->features; 153662306a36Sopenharmony_ci struct veth_priv *priv = netdev_priv(dev); 153762306a36Sopenharmony_ci struct net_device *peer; 153862306a36Sopenharmony_ci int err; 153962306a36Sopenharmony_ci 154062306a36Sopenharmony_ci if (!(changed & NETIF_F_GRO) || !(dev->flags & IFF_UP) || priv->_xdp_prog) 154162306a36Sopenharmony_ci return 0; 154262306a36Sopenharmony_ci 154362306a36Sopenharmony_ci peer = rtnl_dereference(priv->peer); 154462306a36Sopenharmony_ci if (features & NETIF_F_GRO) { 154562306a36Sopenharmony_ci err = veth_napi_enable(dev); 154662306a36Sopenharmony_ci if (err) 154762306a36Sopenharmony_ci return err; 154862306a36Sopenharmony_ci 154962306a36Sopenharmony_ci if (peer) 155062306a36Sopenharmony_ci xdp_features_set_redirect_target(peer, true); 155162306a36Sopenharmony_ci } else { 155262306a36Sopenharmony_ci if (peer) 155362306a36Sopenharmony_ci xdp_features_clear_redirect_target(peer); 155462306a36Sopenharmony_ci veth_napi_del(dev); 155562306a36Sopenharmony_ci } 155662306a36Sopenharmony_ci return 0; 155762306a36Sopenharmony_ci} 155862306a36Sopenharmony_ci 155962306a36Sopenharmony_cistatic void veth_set_rx_headroom(struct net_device *dev, int new_hr) 156062306a36Sopenharmony_ci{ 156162306a36Sopenharmony_ci struct veth_priv *peer_priv, *priv = netdev_priv(dev); 156262306a36Sopenharmony_ci struct net_device *peer; 156362306a36Sopenharmony_ci 156462306a36Sopenharmony_ci if (new_hr < 0) 156562306a36Sopenharmony_ci new_hr = 0; 156662306a36Sopenharmony_ci 156762306a36Sopenharmony_ci rcu_read_lock(); 156862306a36Sopenharmony_ci peer = rcu_dereference(priv->peer); 156962306a36Sopenharmony_ci if (unlikely(!peer)) 157062306a36Sopenharmony_ci goto out; 157162306a36Sopenharmony_ci 157262306a36Sopenharmony_ci peer_priv = netdev_priv(peer); 157362306a36Sopenharmony_ci priv->requested_headroom = new_hr; 157462306a36Sopenharmony_ci new_hr = max(priv->requested_headroom, peer_priv->requested_headroom); 157562306a36Sopenharmony_ci dev->needed_headroom = new_hr; 157662306a36Sopenharmony_ci peer->needed_headroom = new_hr; 157762306a36Sopenharmony_ci 157862306a36Sopenharmony_ciout: 157962306a36Sopenharmony_ci rcu_read_unlock(); 158062306a36Sopenharmony_ci} 158162306a36Sopenharmony_ci 158262306a36Sopenharmony_cistatic int veth_xdp_set(struct net_device *dev, struct bpf_prog *prog, 158362306a36Sopenharmony_ci struct netlink_ext_ack *extack) 158462306a36Sopenharmony_ci{ 158562306a36Sopenharmony_ci struct veth_priv *priv = netdev_priv(dev); 158662306a36Sopenharmony_ci struct bpf_prog *old_prog; 158762306a36Sopenharmony_ci struct net_device *peer; 158862306a36Sopenharmony_ci unsigned int max_mtu; 158962306a36Sopenharmony_ci int err; 159062306a36Sopenharmony_ci 159162306a36Sopenharmony_ci old_prog = priv->_xdp_prog; 159262306a36Sopenharmony_ci priv->_xdp_prog = prog; 159362306a36Sopenharmony_ci peer = rtnl_dereference(priv->peer); 159462306a36Sopenharmony_ci 159562306a36Sopenharmony_ci if (prog) { 159662306a36Sopenharmony_ci if (!peer) { 159762306a36Sopenharmony_ci NL_SET_ERR_MSG_MOD(extack, "Cannot set XDP when peer is detached"); 159862306a36Sopenharmony_ci err = -ENOTCONN; 159962306a36Sopenharmony_ci goto err; 160062306a36Sopenharmony_ci } 160162306a36Sopenharmony_ci 160262306a36Sopenharmony_ci max_mtu = SKB_WITH_OVERHEAD(PAGE_SIZE - VETH_XDP_HEADROOM) - 160362306a36Sopenharmony_ci peer->hard_header_len; 160462306a36Sopenharmony_ci /* Allow increasing the max_mtu if the program supports 160562306a36Sopenharmony_ci * XDP fragments. 160662306a36Sopenharmony_ci */ 160762306a36Sopenharmony_ci if (prog->aux->xdp_has_frags) 160862306a36Sopenharmony_ci max_mtu += PAGE_SIZE * MAX_SKB_FRAGS; 160962306a36Sopenharmony_ci 161062306a36Sopenharmony_ci if (peer->mtu > max_mtu) { 161162306a36Sopenharmony_ci NL_SET_ERR_MSG_MOD(extack, "Peer MTU is too large to set XDP"); 161262306a36Sopenharmony_ci err = -ERANGE; 161362306a36Sopenharmony_ci goto err; 161462306a36Sopenharmony_ci } 161562306a36Sopenharmony_ci 161662306a36Sopenharmony_ci if (dev->real_num_rx_queues < peer->real_num_tx_queues) { 161762306a36Sopenharmony_ci NL_SET_ERR_MSG_MOD(extack, "XDP expects number of rx queues not less than peer tx queues"); 161862306a36Sopenharmony_ci err = -ENOSPC; 161962306a36Sopenharmony_ci goto err; 162062306a36Sopenharmony_ci } 162162306a36Sopenharmony_ci 162262306a36Sopenharmony_ci if (dev->flags & IFF_UP) { 162362306a36Sopenharmony_ci err = veth_enable_xdp(dev); 162462306a36Sopenharmony_ci if (err) { 162562306a36Sopenharmony_ci NL_SET_ERR_MSG_MOD(extack, "Setup for XDP failed"); 162662306a36Sopenharmony_ci goto err; 162762306a36Sopenharmony_ci } 162862306a36Sopenharmony_ci } 162962306a36Sopenharmony_ci 163062306a36Sopenharmony_ci if (!old_prog) { 163162306a36Sopenharmony_ci peer->hw_features &= ~NETIF_F_GSO_SOFTWARE; 163262306a36Sopenharmony_ci peer->max_mtu = max_mtu; 163362306a36Sopenharmony_ci } 163462306a36Sopenharmony_ci 163562306a36Sopenharmony_ci xdp_features_set_redirect_target(peer, true); 163662306a36Sopenharmony_ci } 163762306a36Sopenharmony_ci 163862306a36Sopenharmony_ci if (old_prog) { 163962306a36Sopenharmony_ci if (!prog) { 164062306a36Sopenharmony_ci if (peer && !veth_gro_requested(dev)) 164162306a36Sopenharmony_ci xdp_features_clear_redirect_target(peer); 164262306a36Sopenharmony_ci 164362306a36Sopenharmony_ci if (dev->flags & IFF_UP) 164462306a36Sopenharmony_ci veth_disable_xdp(dev); 164562306a36Sopenharmony_ci 164662306a36Sopenharmony_ci if (peer) { 164762306a36Sopenharmony_ci peer->hw_features |= NETIF_F_GSO_SOFTWARE; 164862306a36Sopenharmony_ci peer->max_mtu = ETH_MAX_MTU; 164962306a36Sopenharmony_ci } 165062306a36Sopenharmony_ci } 165162306a36Sopenharmony_ci bpf_prog_put(old_prog); 165262306a36Sopenharmony_ci } 165362306a36Sopenharmony_ci 165462306a36Sopenharmony_ci if ((!!old_prog ^ !!prog) && peer) 165562306a36Sopenharmony_ci netdev_update_features(peer); 165662306a36Sopenharmony_ci 165762306a36Sopenharmony_ci return 0; 165862306a36Sopenharmony_cierr: 165962306a36Sopenharmony_ci priv->_xdp_prog = old_prog; 166062306a36Sopenharmony_ci 166162306a36Sopenharmony_ci return err; 166262306a36Sopenharmony_ci} 166362306a36Sopenharmony_ci 166462306a36Sopenharmony_cistatic int veth_xdp(struct net_device *dev, struct netdev_bpf *xdp) 166562306a36Sopenharmony_ci{ 166662306a36Sopenharmony_ci switch (xdp->command) { 166762306a36Sopenharmony_ci case XDP_SETUP_PROG: 166862306a36Sopenharmony_ci return veth_xdp_set(dev, xdp->prog, xdp->extack); 166962306a36Sopenharmony_ci default: 167062306a36Sopenharmony_ci return -EINVAL; 167162306a36Sopenharmony_ci } 167262306a36Sopenharmony_ci} 167362306a36Sopenharmony_ci 167462306a36Sopenharmony_cistatic int veth_xdp_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp) 167562306a36Sopenharmony_ci{ 167662306a36Sopenharmony_ci struct veth_xdp_buff *_ctx = (void *)ctx; 167762306a36Sopenharmony_ci 167862306a36Sopenharmony_ci if (!_ctx->skb) 167962306a36Sopenharmony_ci return -ENODATA; 168062306a36Sopenharmony_ci 168162306a36Sopenharmony_ci *timestamp = skb_hwtstamps(_ctx->skb)->hwtstamp; 168262306a36Sopenharmony_ci return 0; 168362306a36Sopenharmony_ci} 168462306a36Sopenharmony_ci 168562306a36Sopenharmony_cistatic int veth_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash, 168662306a36Sopenharmony_ci enum xdp_rss_hash_type *rss_type) 168762306a36Sopenharmony_ci{ 168862306a36Sopenharmony_ci struct veth_xdp_buff *_ctx = (void *)ctx; 168962306a36Sopenharmony_ci struct sk_buff *skb = _ctx->skb; 169062306a36Sopenharmony_ci 169162306a36Sopenharmony_ci if (!skb) 169262306a36Sopenharmony_ci return -ENODATA; 169362306a36Sopenharmony_ci 169462306a36Sopenharmony_ci *hash = skb_get_hash(skb); 169562306a36Sopenharmony_ci *rss_type = skb->l4_hash ? XDP_RSS_TYPE_L4_ANY : XDP_RSS_TYPE_NONE; 169662306a36Sopenharmony_ci 169762306a36Sopenharmony_ci return 0; 169862306a36Sopenharmony_ci} 169962306a36Sopenharmony_ci 170062306a36Sopenharmony_cistatic const struct net_device_ops veth_netdev_ops = { 170162306a36Sopenharmony_ci .ndo_init = veth_dev_init, 170262306a36Sopenharmony_ci .ndo_open = veth_open, 170362306a36Sopenharmony_ci .ndo_stop = veth_close, 170462306a36Sopenharmony_ci .ndo_start_xmit = veth_xmit, 170562306a36Sopenharmony_ci .ndo_get_stats64 = veth_get_stats64, 170662306a36Sopenharmony_ci .ndo_set_rx_mode = veth_set_multicast_list, 170762306a36Sopenharmony_ci .ndo_set_mac_address = eth_mac_addr, 170862306a36Sopenharmony_ci#ifdef CONFIG_NET_POLL_CONTROLLER 170962306a36Sopenharmony_ci .ndo_poll_controller = veth_poll_controller, 171062306a36Sopenharmony_ci#endif 171162306a36Sopenharmony_ci .ndo_get_iflink = veth_get_iflink, 171262306a36Sopenharmony_ci .ndo_fix_features = veth_fix_features, 171362306a36Sopenharmony_ci .ndo_set_features = veth_set_features, 171462306a36Sopenharmony_ci .ndo_features_check = passthru_features_check, 171562306a36Sopenharmony_ci .ndo_set_rx_headroom = veth_set_rx_headroom, 171662306a36Sopenharmony_ci .ndo_bpf = veth_xdp, 171762306a36Sopenharmony_ci .ndo_xdp_xmit = veth_ndo_xdp_xmit, 171862306a36Sopenharmony_ci .ndo_get_peer_dev = veth_peer_dev, 171962306a36Sopenharmony_ci}; 172062306a36Sopenharmony_ci 172162306a36Sopenharmony_cistatic const struct xdp_metadata_ops veth_xdp_metadata_ops = { 172262306a36Sopenharmony_ci .xmo_rx_timestamp = veth_xdp_rx_timestamp, 172362306a36Sopenharmony_ci .xmo_rx_hash = veth_xdp_rx_hash, 172462306a36Sopenharmony_ci}; 172562306a36Sopenharmony_ci 172662306a36Sopenharmony_ci#define VETH_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HW_CSUM | \ 172762306a36Sopenharmony_ci NETIF_F_RXCSUM | NETIF_F_SCTP_CRC | NETIF_F_HIGHDMA | \ 172862306a36Sopenharmony_ci NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ENCAP_ALL | \ 172962306a36Sopenharmony_ci NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | \ 173062306a36Sopenharmony_ci NETIF_F_HW_VLAN_STAG_TX | NETIF_F_HW_VLAN_STAG_RX ) 173162306a36Sopenharmony_ci 173262306a36Sopenharmony_cistatic void veth_setup(struct net_device *dev) 173362306a36Sopenharmony_ci{ 173462306a36Sopenharmony_ci ether_setup(dev); 173562306a36Sopenharmony_ci 173662306a36Sopenharmony_ci dev->priv_flags &= ~IFF_TX_SKB_SHARING; 173762306a36Sopenharmony_ci dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; 173862306a36Sopenharmony_ci dev->priv_flags |= IFF_NO_QUEUE; 173962306a36Sopenharmony_ci dev->priv_flags |= IFF_PHONY_HEADROOM; 174062306a36Sopenharmony_ci 174162306a36Sopenharmony_ci dev->netdev_ops = &veth_netdev_ops; 174262306a36Sopenharmony_ci dev->xdp_metadata_ops = &veth_xdp_metadata_ops; 174362306a36Sopenharmony_ci dev->ethtool_ops = &veth_ethtool_ops; 174462306a36Sopenharmony_ci dev->features |= NETIF_F_LLTX; 174562306a36Sopenharmony_ci dev->features |= VETH_FEATURES; 174662306a36Sopenharmony_ci dev->vlan_features = dev->features & 174762306a36Sopenharmony_ci ~(NETIF_F_HW_VLAN_CTAG_TX | 174862306a36Sopenharmony_ci NETIF_F_HW_VLAN_STAG_TX | 174962306a36Sopenharmony_ci NETIF_F_HW_VLAN_CTAG_RX | 175062306a36Sopenharmony_ci NETIF_F_HW_VLAN_STAG_RX); 175162306a36Sopenharmony_ci dev->needs_free_netdev = true; 175262306a36Sopenharmony_ci dev->priv_destructor = veth_dev_free; 175362306a36Sopenharmony_ci dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS; 175462306a36Sopenharmony_ci dev->max_mtu = ETH_MAX_MTU; 175562306a36Sopenharmony_ci 175662306a36Sopenharmony_ci dev->hw_features = VETH_FEATURES; 175762306a36Sopenharmony_ci dev->hw_enc_features = VETH_FEATURES; 175862306a36Sopenharmony_ci dev->mpls_features = NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE; 175962306a36Sopenharmony_ci netif_set_tso_max_size(dev, GSO_MAX_SIZE); 176062306a36Sopenharmony_ci} 176162306a36Sopenharmony_ci 176262306a36Sopenharmony_ci/* 176362306a36Sopenharmony_ci * netlink interface 176462306a36Sopenharmony_ci */ 176562306a36Sopenharmony_ci 176662306a36Sopenharmony_cistatic int veth_validate(struct nlattr *tb[], struct nlattr *data[], 176762306a36Sopenharmony_ci struct netlink_ext_ack *extack) 176862306a36Sopenharmony_ci{ 176962306a36Sopenharmony_ci if (tb[IFLA_ADDRESS]) { 177062306a36Sopenharmony_ci if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) 177162306a36Sopenharmony_ci return -EINVAL; 177262306a36Sopenharmony_ci if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) 177362306a36Sopenharmony_ci return -EADDRNOTAVAIL; 177462306a36Sopenharmony_ci } 177562306a36Sopenharmony_ci if (tb[IFLA_MTU]) { 177662306a36Sopenharmony_ci if (!is_valid_veth_mtu(nla_get_u32(tb[IFLA_MTU]))) 177762306a36Sopenharmony_ci return -EINVAL; 177862306a36Sopenharmony_ci } 177962306a36Sopenharmony_ci return 0; 178062306a36Sopenharmony_ci} 178162306a36Sopenharmony_ci 178262306a36Sopenharmony_cistatic struct rtnl_link_ops veth_link_ops; 178362306a36Sopenharmony_ci 178462306a36Sopenharmony_cistatic void veth_disable_gro(struct net_device *dev) 178562306a36Sopenharmony_ci{ 178662306a36Sopenharmony_ci dev->features &= ~NETIF_F_GRO; 178762306a36Sopenharmony_ci dev->wanted_features &= ~NETIF_F_GRO; 178862306a36Sopenharmony_ci netdev_update_features(dev); 178962306a36Sopenharmony_ci} 179062306a36Sopenharmony_ci 179162306a36Sopenharmony_cistatic int veth_init_queues(struct net_device *dev, struct nlattr *tb[]) 179262306a36Sopenharmony_ci{ 179362306a36Sopenharmony_ci int err; 179462306a36Sopenharmony_ci 179562306a36Sopenharmony_ci if (!tb[IFLA_NUM_TX_QUEUES] && dev->num_tx_queues > 1) { 179662306a36Sopenharmony_ci err = netif_set_real_num_tx_queues(dev, 1); 179762306a36Sopenharmony_ci if (err) 179862306a36Sopenharmony_ci return err; 179962306a36Sopenharmony_ci } 180062306a36Sopenharmony_ci if (!tb[IFLA_NUM_RX_QUEUES] && dev->num_rx_queues > 1) { 180162306a36Sopenharmony_ci err = netif_set_real_num_rx_queues(dev, 1); 180262306a36Sopenharmony_ci if (err) 180362306a36Sopenharmony_ci return err; 180462306a36Sopenharmony_ci } 180562306a36Sopenharmony_ci return 0; 180662306a36Sopenharmony_ci} 180762306a36Sopenharmony_ci 180862306a36Sopenharmony_cistatic int veth_newlink(struct net *src_net, struct net_device *dev, 180962306a36Sopenharmony_ci struct nlattr *tb[], struct nlattr *data[], 181062306a36Sopenharmony_ci struct netlink_ext_ack *extack) 181162306a36Sopenharmony_ci{ 181262306a36Sopenharmony_ci int err; 181362306a36Sopenharmony_ci struct net_device *peer; 181462306a36Sopenharmony_ci struct veth_priv *priv; 181562306a36Sopenharmony_ci char ifname[IFNAMSIZ]; 181662306a36Sopenharmony_ci struct nlattr *peer_tb[IFLA_MAX + 1], **tbp; 181762306a36Sopenharmony_ci unsigned char name_assign_type; 181862306a36Sopenharmony_ci struct ifinfomsg *ifmp; 181962306a36Sopenharmony_ci struct net *net; 182062306a36Sopenharmony_ci 182162306a36Sopenharmony_ci /* 182262306a36Sopenharmony_ci * create and register peer first 182362306a36Sopenharmony_ci */ 182462306a36Sopenharmony_ci if (data != NULL && data[VETH_INFO_PEER] != NULL) { 182562306a36Sopenharmony_ci struct nlattr *nla_peer; 182662306a36Sopenharmony_ci 182762306a36Sopenharmony_ci nla_peer = data[VETH_INFO_PEER]; 182862306a36Sopenharmony_ci ifmp = nla_data(nla_peer); 182962306a36Sopenharmony_ci err = rtnl_nla_parse_ifinfomsg(peer_tb, nla_peer, extack); 183062306a36Sopenharmony_ci if (err < 0) 183162306a36Sopenharmony_ci return err; 183262306a36Sopenharmony_ci 183362306a36Sopenharmony_ci err = veth_validate(peer_tb, NULL, extack); 183462306a36Sopenharmony_ci if (err < 0) 183562306a36Sopenharmony_ci return err; 183662306a36Sopenharmony_ci 183762306a36Sopenharmony_ci tbp = peer_tb; 183862306a36Sopenharmony_ci } else { 183962306a36Sopenharmony_ci ifmp = NULL; 184062306a36Sopenharmony_ci tbp = tb; 184162306a36Sopenharmony_ci } 184262306a36Sopenharmony_ci 184362306a36Sopenharmony_ci if (ifmp && tbp[IFLA_IFNAME]) { 184462306a36Sopenharmony_ci nla_strscpy(ifname, tbp[IFLA_IFNAME], IFNAMSIZ); 184562306a36Sopenharmony_ci name_assign_type = NET_NAME_USER; 184662306a36Sopenharmony_ci } else { 184762306a36Sopenharmony_ci snprintf(ifname, IFNAMSIZ, DRV_NAME "%%d"); 184862306a36Sopenharmony_ci name_assign_type = NET_NAME_ENUM; 184962306a36Sopenharmony_ci } 185062306a36Sopenharmony_ci 185162306a36Sopenharmony_ci net = rtnl_link_get_net(src_net, tbp); 185262306a36Sopenharmony_ci if (IS_ERR(net)) 185362306a36Sopenharmony_ci return PTR_ERR(net); 185462306a36Sopenharmony_ci 185562306a36Sopenharmony_ci peer = rtnl_create_link(net, ifname, name_assign_type, 185662306a36Sopenharmony_ci &veth_link_ops, tbp, extack); 185762306a36Sopenharmony_ci if (IS_ERR(peer)) { 185862306a36Sopenharmony_ci put_net(net); 185962306a36Sopenharmony_ci return PTR_ERR(peer); 186062306a36Sopenharmony_ci } 186162306a36Sopenharmony_ci 186262306a36Sopenharmony_ci if (!ifmp || !tbp[IFLA_ADDRESS]) 186362306a36Sopenharmony_ci eth_hw_addr_random(peer); 186462306a36Sopenharmony_ci 186562306a36Sopenharmony_ci if (ifmp && (dev->ifindex != 0)) 186662306a36Sopenharmony_ci peer->ifindex = ifmp->ifi_index; 186762306a36Sopenharmony_ci 186862306a36Sopenharmony_ci netif_inherit_tso_max(peer, dev); 186962306a36Sopenharmony_ci 187062306a36Sopenharmony_ci err = register_netdevice(peer); 187162306a36Sopenharmony_ci put_net(net); 187262306a36Sopenharmony_ci net = NULL; 187362306a36Sopenharmony_ci if (err < 0) 187462306a36Sopenharmony_ci goto err_register_peer; 187562306a36Sopenharmony_ci 187662306a36Sopenharmony_ci /* keep GRO disabled by default to be consistent with the established 187762306a36Sopenharmony_ci * veth behavior 187862306a36Sopenharmony_ci */ 187962306a36Sopenharmony_ci veth_disable_gro(peer); 188062306a36Sopenharmony_ci netif_carrier_off(peer); 188162306a36Sopenharmony_ci 188262306a36Sopenharmony_ci err = rtnl_configure_link(peer, ifmp, 0, NULL); 188362306a36Sopenharmony_ci if (err < 0) 188462306a36Sopenharmony_ci goto err_configure_peer; 188562306a36Sopenharmony_ci 188662306a36Sopenharmony_ci /* 188762306a36Sopenharmony_ci * register dev last 188862306a36Sopenharmony_ci * 188962306a36Sopenharmony_ci * note, that since we've registered new device the dev's name 189062306a36Sopenharmony_ci * should be re-allocated 189162306a36Sopenharmony_ci */ 189262306a36Sopenharmony_ci 189362306a36Sopenharmony_ci if (tb[IFLA_ADDRESS] == NULL) 189462306a36Sopenharmony_ci eth_hw_addr_random(dev); 189562306a36Sopenharmony_ci 189662306a36Sopenharmony_ci if (tb[IFLA_IFNAME]) 189762306a36Sopenharmony_ci nla_strscpy(dev->name, tb[IFLA_IFNAME], IFNAMSIZ); 189862306a36Sopenharmony_ci else 189962306a36Sopenharmony_ci snprintf(dev->name, IFNAMSIZ, DRV_NAME "%%d"); 190062306a36Sopenharmony_ci 190162306a36Sopenharmony_ci err = register_netdevice(dev); 190262306a36Sopenharmony_ci if (err < 0) 190362306a36Sopenharmony_ci goto err_register_dev; 190462306a36Sopenharmony_ci 190562306a36Sopenharmony_ci netif_carrier_off(dev); 190662306a36Sopenharmony_ci 190762306a36Sopenharmony_ci /* 190862306a36Sopenharmony_ci * tie the deviced together 190962306a36Sopenharmony_ci */ 191062306a36Sopenharmony_ci 191162306a36Sopenharmony_ci priv = netdev_priv(dev); 191262306a36Sopenharmony_ci rcu_assign_pointer(priv->peer, peer); 191362306a36Sopenharmony_ci err = veth_init_queues(dev, tb); 191462306a36Sopenharmony_ci if (err) 191562306a36Sopenharmony_ci goto err_queues; 191662306a36Sopenharmony_ci 191762306a36Sopenharmony_ci priv = netdev_priv(peer); 191862306a36Sopenharmony_ci rcu_assign_pointer(priv->peer, dev); 191962306a36Sopenharmony_ci err = veth_init_queues(peer, tb); 192062306a36Sopenharmony_ci if (err) 192162306a36Sopenharmony_ci goto err_queues; 192262306a36Sopenharmony_ci 192362306a36Sopenharmony_ci veth_disable_gro(dev); 192462306a36Sopenharmony_ci /* update XDP supported features */ 192562306a36Sopenharmony_ci veth_set_xdp_features(dev); 192662306a36Sopenharmony_ci veth_set_xdp_features(peer); 192762306a36Sopenharmony_ci 192862306a36Sopenharmony_ci return 0; 192962306a36Sopenharmony_ci 193062306a36Sopenharmony_cierr_queues: 193162306a36Sopenharmony_ci unregister_netdevice(dev); 193262306a36Sopenharmony_cierr_register_dev: 193362306a36Sopenharmony_ci /* nothing to do */ 193462306a36Sopenharmony_cierr_configure_peer: 193562306a36Sopenharmony_ci unregister_netdevice(peer); 193662306a36Sopenharmony_ci return err; 193762306a36Sopenharmony_ci 193862306a36Sopenharmony_cierr_register_peer: 193962306a36Sopenharmony_ci free_netdev(peer); 194062306a36Sopenharmony_ci return err; 194162306a36Sopenharmony_ci} 194262306a36Sopenharmony_ci 194362306a36Sopenharmony_cistatic void veth_dellink(struct net_device *dev, struct list_head *head) 194462306a36Sopenharmony_ci{ 194562306a36Sopenharmony_ci struct veth_priv *priv; 194662306a36Sopenharmony_ci struct net_device *peer; 194762306a36Sopenharmony_ci 194862306a36Sopenharmony_ci priv = netdev_priv(dev); 194962306a36Sopenharmony_ci peer = rtnl_dereference(priv->peer); 195062306a36Sopenharmony_ci 195162306a36Sopenharmony_ci /* Note : dellink() is called from default_device_exit_batch(), 195262306a36Sopenharmony_ci * before a rcu_synchronize() point. The devices are guaranteed 195362306a36Sopenharmony_ci * not being freed before one RCU grace period. 195462306a36Sopenharmony_ci */ 195562306a36Sopenharmony_ci RCU_INIT_POINTER(priv->peer, NULL); 195662306a36Sopenharmony_ci unregister_netdevice_queue(dev, head); 195762306a36Sopenharmony_ci 195862306a36Sopenharmony_ci if (peer) { 195962306a36Sopenharmony_ci priv = netdev_priv(peer); 196062306a36Sopenharmony_ci RCU_INIT_POINTER(priv->peer, NULL); 196162306a36Sopenharmony_ci unregister_netdevice_queue(peer, head); 196262306a36Sopenharmony_ci } 196362306a36Sopenharmony_ci} 196462306a36Sopenharmony_ci 196562306a36Sopenharmony_cistatic const struct nla_policy veth_policy[VETH_INFO_MAX + 1] = { 196662306a36Sopenharmony_ci [VETH_INFO_PEER] = { .len = sizeof(struct ifinfomsg) }, 196762306a36Sopenharmony_ci}; 196862306a36Sopenharmony_ci 196962306a36Sopenharmony_cistatic struct net *veth_get_link_net(const struct net_device *dev) 197062306a36Sopenharmony_ci{ 197162306a36Sopenharmony_ci struct veth_priv *priv = netdev_priv(dev); 197262306a36Sopenharmony_ci struct net_device *peer = rtnl_dereference(priv->peer); 197362306a36Sopenharmony_ci 197462306a36Sopenharmony_ci return peer ? dev_net(peer) : dev_net(dev); 197562306a36Sopenharmony_ci} 197662306a36Sopenharmony_ci 197762306a36Sopenharmony_cistatic unsigned int veth_get_num_queues(void) 197862306a36Sopenharmony_ci{ 197962306a36Sopenharmony_ci /* enforce the same queue limit as rtnl_create_link */ 198062306a36Sopenharmony_ci int queues = num_possible_cpus(); 198162306a36Sopenharmony_ci 198262306a36Sopenharmony_ci if (queues > 4096) 198362306a36Sopenharmony_ci queues = 4096; 198462306a36Sopenharmony_ci return queues; 198562306a36Sopenharmony_ci} 198662306a36Sopenharmony_ci 198762306a36Sopenharmony_cistatic struct rtnl_link_ops veth_link_ops = { 198862306a36Sopenharmony_ci .kind = DRV_NAME, 198962306a36Sopenharmony_ci .priv_size = sizeof(struct veth_priv), 199062306a36Sopenharmony_ci .setup = veth_setup, 199162306a36Sopenharmony_ci .validate = veth_validate, 199262306a36Sopenharmony_ci .newlink = veth_newlink, 199362306a36Sopenharmony_ci .dellink = veth_dellink, 199462306a36Sopenharmony_ci .policy = veth_policy, 199562306a36Sopenharmony_ci .maxtype = VETH_INFO_MAX, 199662306a36Sopenharmony_ci .get_link_net = veth_get_link_net, 199762306a36Sopenharmony_ci .get_num_tx_queues = veth_get_num_queues, 199862306a36Sopenharmony_ci .get_num_rx_queues = veth_get_num_queues, 199962306a36Sopenharmony_ci}; 200062306a36Sopenharmony_ci 200162306a36Sopenharmony_ci/* 200262306a36Sopenharmony_ci * init/fini 200362306a36Sopenharmony_ci */ 200462306a36Sopenharmony_ci 200562306a36Sopenharmony_cistatic __init int veth_init(void) 200662306a36Sopenharmony_ci{ 200762306a36Sopenharmony_ci return rtnl_link_register(&veth_link_ops); 200862306a36Sopenharmony_ci} 200962306a36Sopenharmony_ci 201062306a36Sopenharmony_cistatic __exit void veth_exit(void) 201162306a36Sopenharmony_ci{ 201262306a36Sopenharmony_ci rtnl_link_unregister(&veth_link_ops); 201362306a36Sopenharmony_ci} 201462306a36Sopenharmony_ci 201562306a36Sopenharmony_cimodule_init(veth_init); 201662306a36Sopenharmony_cimodule_exit(veth_exit); 201762306a36Sopenharmony_ci 201862306a36Sopenharmony_ciMODULE_DESCRIPTION("Virtual Ethernet Tunnel"); 201962306a36Sopenharmony_ciMODULE_LICENSE("GPL v2"); 202062306a36Sopenharmony_ciMODULE_ALIAS_RTNL_LINK(DRV_NAME); 2021