162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 262306a36Sopenharmony_ci#include <linux/etherdevice.h> 362306a36Sopenharmony_ci#include <linux/if_tap.h> 462306a36Sopenharmony_ci#include <linux/if_vlan.h> 562306a36Sopenharmony_ci#include <linux/interrupt.h> 662306a36Sopenharmony_ci#include <linux/nsproxy.h> 762306a36Sopenharmony_ci#include <linux/compat.h> 862306a36Sopenharmony_ci#include <linux/if_tun.h> 962306a36Sopenharmony_ci#include <linux/module.h> 1062306a36Sopenharmony_ci#include <linux/skbuff.h> 1162306a36Sopenharmony_ci#include <linux/cache.h> 1262306a36Sopenharmony_ci#include <linux/sched/signal.h> 1362306a36Sopenharmony_ci#include <linux/types.h> 1462306a36Sopenharmony_ci#include <linux/slab.h> 1562306a36Sopenharmony_ci#include <linux/wait.h> 1662306a36Sopenharmony_ci#include <linux/cdev.h> 1762306a36Sopenharmony_ci#include <linux/idr.h> 1862306a36Sopenharmony_ci#include <linux/fs.h> 1962306a36Sopenharmony_ci#include <linux/uio.h> 2062306a36Sopenharmony_ci 2162306a36Sopenharmony_ci#include <net/gso.h> 2262306a36Sopenharmony_ci#include <net/net_namespace.h> 2362306a36Sopenharmony_ci#include <net/rtnetlink.h> 2462306a36Sopenharmony_ci#include <net/sock.h> 2562306a36Sopenharmony_ci#include <net/xdp.h> 2662306a36Sopenharmony_ci#include <linux/virtio_net.h> 2762306a36Sopenharmony_ci#include <linux/skb_array.h> 2862306a36Sopenharmony_ci 2962306a36Sopenharmony_ci#define TAP_IFFEATURES (IFF_VNET_HDR | IFF_MULTI_QUEUE) 3062306a36Sopenharmony_ci 3162306a36Sopenharmony_ci#define TAP_VNET_LE 0x80000000 3262306a36Sopenharmony_ci#define TAP_VNET_BE 0x40000000 3362306a36Sopenharmony_ci 3462306a36Sopenharmony_ci#ifdef CONFIG_TUN_VNET_CROSS_LE 3562306a36Sopenharmony_cistatic inline bool tap_legacy_is_little_endian(struct tap_queue *q) 3662306a36Sopenharmony_ci{ 3762306a36Sopenharmony_ci return q->flags & TAP_VNET_BE ? false : 3862306a36Sopenharmony_ci virtio_legacy_is_little_endian(); 3962306a36Sopenharmony_ci} 4062306a36Sopenharmony_ci 4162306a36Sopenharmony_cistatic long tap_get_vnet_be(struct tap_queue *q, int __user *sp) 4262306a36Sopenharmony_ci{ 4362306a36Sopenharmony_ci int s = !!(q->flags & TAP_VNET_BE); 4462306a36Sopenharmony_ci 4562306a36Sopenharmony_ci if (put_user(s, sp)) 4662306a36Sopenharmony_ci return -EFAULT; 4762306a36Sopenharmony_ci 4862306a36Sopenharmony_ci return 0; 4962306a36Sopenharmony_ci} 5062306a36Sopenharmony_ci 5162306a36Sopenharmony_cistatic long tap_set_vnet_be(struct tap_queue *q, int __user *sp) 5262306a36Sopenharmony_ci{ 5362306a36Sopenharmony_ci int s; 5462306a36Sopenharmony_ci 5562306a36Sopenharmony_ci if (get_user(s, sp)) 5662306a36Sopenharmony_ci return -EFAULT; 5762306a36Sopenharmony_ci 5862306a36Sopenharmony_ci if (s) 5962306a36Sopenharmony_ci q->flags |= TAP_VNET_BE; 6062306a36Sopenharmony_ci else 6162306a36Sopenharmony_ci q->flags &= ~TAP_VNET_BE; 6262306a36Sopenharmony_ci 6362306a36Sopenharmony_ci return 0; 6462306a36Sopenharmony_ci} 6562306a36Sopenharmony_ci#else 6662306a36Sopenharmony_cistatic inline bool tap_legacy_is_little_endian(struct tap_queue *q) 6762306a36Sopenharmony_ci{ 6862306a36Sopenharmony_ci return virtio_legacy_is_little_endian(); 6962306a36Sopenharmony_ci} 7062306a36Sopenharmony_ci 7162306a36Sopenharmony_cistatic long tap_get_vnet_be(struct tap_queue *q, int __user *argp) 7262306a36Sopenharmony_ci{ 7362306a36Sopenharmony_ci return -EINVAL; 7462306a36Sopenharmony_ci} 7562306a36Sopenharmony_ci 7662306a36Sopenharmony_cistatic long tap_set_vnet_be(struct tap_queue *q, int __user *argp) 7762306a36Sopenharmony_ci{ 7862306a36Sopenharmony_ci return -EINVAL; 7962306a36Sopenharmony_ci} 8062306a36Sopenharmony_ci#endif /* CONFIG_TUN_VNET_CROSS_LE */ 8162306a36Sopenharmony_ci 8262306a36Sopenharmony_cistatic inline bool tap_is_little_endian(struct tap_queue *q) 8362306a36Sopenharmony_ci{ 8462306a36Sopenharmony_ci return q->flags & TAP_VNET_LE || 8562306a36Sopenharmony_ci tap_legacy_is_little_endian(q); 8662306a36Sopenharmony_ci} 8762306a36Sopenharmony_ci 8862306a36Sopenharmony_cistatic inline u16 tap16_to_cpu(struct tap_queue *q, __virtio16 val) 8962306a36Sopenharmony_ci{ 9062306a36Sopenharmony_ci return __virtio16_to_cpu(tap_is_little_endian(q), val); 9162306a36Sopenharmony_ci} 9262306a36Sopenharmony_ci 9362306a36Sopenharmony_cistatic inline __virtio16 cpu_to_tap16(struct tap_queue *q, u16 val) 9462306a36Sopenharmony_ci{ 9562306a36Sopenharmony_ci return __cpu_to_virtio16(tap_is_little_endian(q), val); 9662306a36Sopenharmony_ci} 9762306a36Sopenharmony_ci 9862306a36Sopenharmony_cistatic struct proto tap_proto = { 9962306a36Sopenharmony_ci .name = "tap", 10062306a36Sopenharmony_ci .owner = THIS_MODULE, 10162306a36Sopenharmony_ci .obj_size = sizeof(struct tap_queue), 10262306a36Sopenharmony_ci}; 10362306a36Sopenharmony_ci 10462306a36Sopenharmony_ci#define TAP_NUM_DEVS (1U << MINORBITS) 10562306a36Sopenharmony_ci 10662306a36Sopenharmony_cistatic LIST_HEAD(major_list); 10762306a36Sopenharmony_ci 10862306a36Sopenharmony_cistruct major_info { 10962306a36Sopenharmony_ci struct rcu_head rcu; 11062306a36Sopenharmony_ci dev_t major; 11162306a36Sopenharmony_ci struct idr minor_idr; 11262306a36Sopenharmony_ci spinlock_t minor_lock; 11362306a36Sopenharmony_ci const char *device_name; 11462306a36Sopenharmony_ci struct list_head next; 11562306a36Sopenharmony_ci}; 11662306a36Sopenharmony_ci 11762306a36Sopenharmony_ci#define GOODCOPY_LEN 128 11862306a36Sopenharmony_ci 11962306a36Sopenharmony_cistatic const struct proto_ops tap_socket_ops; 12062306a36Sopenharmony_ci 12162306a36Sopenharmony_ci#define RX_OFFLOADS (NETIF_F_GRO | NETIF_F_LRO) 12262306a36Sopenharmony_ci#define TAP_FEATURES (NETIF_F_GSO | NETIF_F_SG | NETIF_F_FRAGLIST) 12362306a36Sopenharmony_ci 12462306a36Sopenharmony_cistatic struct tap_dev *tap_dev_get_rcu(const struct net_device *dev) 12562306a36Sopenharmony_ci{ 12662306a36Sopenharmony_ci return rcu_dereference(dev->rx_handler_data); 12762306a36Sopenharmony_ci} 12862306a36Sopenharmony_ci 12962306a36Sopenharmony_ci/* 13062306a36Sopenharmony_ci * RCU usage: 13162306a36Sopenharmony_ci * The tap_queue and the macvlan_dev are loosely coupled, the 13262306a36Sopenharmony_ci * pointers from one to the other can only be read while rcu_read_lock 13362306a36Sopenharmony_ci * or rtnl is held. 13462306a36Sopenharmony_ci * 13562306a36Sopenharmony_ci * Both the file and the macvlan_dev hold a reference on the tap_queue 13662306a36Sopenharmony_ci * through sock_hold(&q->sk). When the macvlan_dev goes away first, 13762306a36Sopenharmony_ci * q->vlan becomes inaccessible. When the files gets closed, 13862306a36Sopenharmony_ci * tap_get_queue() fails. 13962306a36Sopenharmony_ci * 14062306a36Sopenharmony_ci * There may still be references to the struct sock inside of the 14162306a36Sopenharmony_ci * queue from outbound SKBs, but these never reference back to the 14262306a36Sopenharmony_ci * file or the dev. The data structure is freed through __sk_free 14362306a36Sopenharmony_ci * when both our references and any pending SKBs are gone. 14462306a36Sopenharmony_ci */ 14562306a36Sopenharmony_ci 14662306a36Sopenharmony_cistatic int tap_enable_queue(struct tap_dev *tap, struct file *file, 14762306a36Sopenharmony_ci struct tap_queue *q) 14862306a36Sopenharmony_ci{ 14962306a36Sopenharmony_ci int err = -EINVAL; 15062306a36Sopenharmony_ci 15162306a36Sopenharmony_ci ASSERT_RTNL(); 15262306a36Sopenharmony_ci 15362306a36Sopenharmony_ci if (q->enabled) 15462306a36Sopenharmony_ci goto out; 15562306a36Sopenharmony_ci 15662306a36Sopenharmony_ci err = 0; 15762306a36Sopenharmony_ci rcu_assign_pointer(tap->taps[tap->numvtaps], q); 15862306a36Sopenharmony_ci q->queue_index = tap->numvtaps; 15962306a36Sopenharmony_ci q->enabled = true; 16062306a36Sopenharmony_ci 16162306a36Sopenharmony_ci tap->numvtaps++; 16262306a36Sopenharmony_ciout: 16362306a36Sopenharmony_ci return err; 16462306a36Sopenharmony_ci} 16562306a36Sopenharmony_ci 16662306a36Sopenharmony_ci/* Requires RTNL */ 16762306a36Sopenharmony_cistatic int tap_set_queue(struct tap_dev *tap, struct file *file, 16862306a36Sopenharmony_ci struct tap_queue *q) 16962306a36Sopenharmony_ci{ 17062306a36Sopenharmony_ci if (tap->numqueues == MAX_TAP_QUEUES) 17162306a36Sopenharmony_ci return -EBUSY; 17262306a36Sopenharmony_ci 17362306a36Sopenharmony_ci rcu_assign_pointer(q->tap, tap); 17462306a36Sopenharmony_ci rcu_assign_pointer(tap->taps[tap->numvtaps], q); 17562306a36Sopenharmony_ci sock_hold(&q->sk); 17662306a36Sopenharmony_ci 17762306a36Sopenharmony_ci q->file = file; 17862306a36Sopenharmony_ci q->queue_index = tap->numvtaps; 17962306a36Sopenharmony_ci q->enabled = true; 18062306a36Sopenharmony_ci file->private_data = q; 18162306a36Sopenharmony_ci list_add_tail(&q->next, &tap->queue_list); 18262306a36Sopenharmony_ci 18362306a36Sopenharmony_ci tap->numvtaps++; 18462306a36Sopenharmony_ci tap->numqueues++; 18562306a36Sopenharmony_ci 18662306a36Sopenharmony_ci return 0; 18762306a36Sopenharmony_ci} 18862306a36Sopenharmony_ci 18962306a36Sopenharmony_cistatic int tap_disable_queue(struct tap_queue *q) 19062306a36Sopenharmony_ci{ 19162306a36Sopenharmony_ci struct tap_dev *tap; 19262306a36Sopenharmony_ci struct tap_queue *nq; 19362306a36Sopenharmony_ci 19462306a36Sopenharmony_ci ASSERT_RTNL(); 19562306a36Sopenharmony_ci if (!q->enabled) 19662306a36Sopenharmony_ci return -EINVAL; 19762306a36Sopenharmony_ci 19862306a36Sopenharmony_ci tap = rtnl_dereference(q->tap); 19962306a36Sopenharmony_ci 20062306a36Sopenharmony_ci if (tap) { 20162306a36Sopenharmony_ci int index = q->queue_index; 20262306a36Sopenharmony_ci BUG_ON(index >= tap->numvtaps); 20362306a36Sopenharmony_ci nq = rtnl_dereference(tap->taps[tap->numvtaps - 1]); 20462306a36Sopenharmony_ci nq->queue_index = index; 20562306a36Sopenharmony_ci 20662306a36Sopenharmony_ci rcu_assign_pointer(tap->taps[index], nq); 20762306a36Sopenharmony_ci RCU_INIT_POINTER(tap->taps[tap->numvtaps - 1], NULL); 20862306a36Sopenharmony_ci q->enabled = false; 20962306a36Sopenharmony_ci 21062306a36Sopenharmony_ci tap->numvtaps--; 21162306a36Sopenharmony_ci } 21262306a36Sopenharmony_ci 21362306a36Sopenharmony_ci return 0; 21462306a36Sopenharmony_ci} 21562306a36Sopenharmony_ci 21662306a36Sopenharmony_ci/* 21762306a36Sopenharmony_ci * The file owning the queue got closed, give up both 21862306a36Sopenharmony_ci * the reference that the files holds as well as the 21962306a36Sopenharmony_ci * one from the macvlan_dev if that still exists. 22062306a36Sopenharmony_ci * 22162306a36Sopenharmony_ci * Using the spinlock makes sure that we don't get 22262306a36Sopenharmony_ci * to the queue again after destroying it. 22362306a36Sopenharmony_ci */ 22462306a36Sopenharmony_cistatic void tap_put_queue(struct tap_queue *q) 22562306a36Sopenharmony_ci{ 22662306a36Sopenharmony_ci struct tap_dev *tap; 22762306a36Sopenharmony_ci 22862306a36Sopenharmony_ci rtnl_lock(); 22962306a36Sopenharmony_ci tap = rtnl_dereference(q->tap); 23062306a36Sopenharmony_ci 23162306a36Sopenharmony_ci if (tap) { 23262306a36Sopenharmony_ci if (q->enabled) 23362306a36Sopenharmony_ci BUG_ON(tap_disable_queue(q)); 23462306a36Sopenharmony_ci 23562306a36Sopenharmony_ci tap->numqueues--; 23662306a36Sopenharmony_ci RCU_INIT_POINTER(q->tap, NULL); 23762306a36Sopenharmony_ci sock_put(&q->sk); 23862306a36Sopenharmony_ci list_del_init(&q->next); 23962306a36Sopenharmony_ci } 24062306a36Sopenharmony_ci 24162306a36Sopenharmony_ci rtnl_unlock(); 24262306a36Sopenharmony_ci 24362306a36Sopenharmony_ci synchronize_rcu(); 24462306a36Sopenharmony_ci sock_put(&q->sk); 24562306a36Sopenharmony_ci} 24662306a36Sopenharmony_ci 24762306a36Sopenharmony_ci/* 24862306a36Sopenharmony_ci * Select a queue based on the rxq of the device on which this packet 24962306a36Sopenharmony_ci * arrived. If the incoming device is not mq, calculate a flow hash 25062306a36Sopenharmony_ci * to select a queue. If all fails, find the first available queue. 25162306a36Sopenharmony_ci * Cache vlan->numvtaps since it can become zero during the execution 25262306a36Sopenharmony_ci * of this function. 25362306a36Sopenharmony_ci */ 25462306a36Sopenharmony_cistatic struct tap_queue *tap_get_queue(struct tap_dev *tap, 25562306a36Sopenharmony_ci struct sk_buff *skb) 25662306a36Sopenharmony_ci{ 25762306a36Sopenharmony_ci struct tap_queue *queue = NULL; 25862306a36Sopenharmony_ci /* Access to taps array is protected by rcu, but access to numvtaps 25962306a36Sopenharmony_ci * isn't. Below we use it to lookup a queue, but treat it as a hint 26062306a36Sopenharmony_ci * and validate that the result isn't NULL - in case we are 26162306a36Sopenharmony_ci * racing against queue removal. 26262306a36Sopenharmony_ci */ 26362306a36Sopenharmony_ci int numvtaps = READ_ONCE(tap->numvtaps); 26462306a36Sopenharmony_ci __u32 rxq; 26562306a36Sopenharmony_ci 26662306a36Sopenharmony_ci if (!numvtaps) 26762306a36Sopenharmony_ci goto out; 26862306a36Sopenharmony_ci 26962306a36Sopenharmony_ci if (numvtaps == 1) 27062306a36Sopenharmony_ci goto single; 27162306a36Sopenharmony_ci 27262306a36Sopenharmony_ci /* Check if we can use flow to select a queue */ 27362306a36Sopenharmony_ci rxq = skb_get_hash(skb); 27462306a36Sopenharmony_ci if (rxq) { 27562306a36Sopenharmony_ci queue = rcu_dereference(tap->taps[rxq % numvtaps]); 27662306a36Sopenharmony_ci goto out; 27762306a36Sopenharmony_ci } 27862306a36Sopenharmony_ci 27962306a36Sopenharmony_ci if (likely(skb_rx_queue_recorded(skb))) { 28062306a36Sopenharmony_ci rxq = skb_get_rx_queue(skb); 28162306a36Sopenharmony_ci 28262306a36Sopenharmony_ci while (unlikely(rxq >= numvtaps)) 28362306a36Sopenharmony_ci rxq -= numvtaps; 28462306a36Sopenharmony_ci 28562306a36Sopenharmony_ci queue = rcu_dereference(tap->taps[rxq]); 28662306a36Sopenharmony_ci goto out; 28762306a36Sopenharmony_ci } 28862306a36Sopenharmony_ci 28962306a36Sopenharmony_cisingle: 29062306a36Sopenharmony_ci queue = rcu_dereference(tap->taps[0]); 29162306a36Sopenharmony_ciout: 29262306a36Sopenharmony_ci return queue; 29362306a36Sopenharmony_ci} 29462306a36Sopenharmony_ci 29562306a36Sopenharmony_ci/* 29662306a36Sopenharmony_ci * The net_device is going away, give up the reference 29762306a36Sopenharmony_ci * that it holds on all queues and safely set the pointer 29862306a36Sopenharmony_ci * from the queues to NULL. 29962306a36Sopenharmony_ci */ 30062306a36Sopenharmony_civoid tap_del_queues(struct tap_dev *tap) 30162306a36Sopenharmony_ci{ 30262306a36Sopenharmony_ci struct tap_queue *q, *tmp; 30362306a36Sopenharmony_ci 30462306a36Sopenharmony_ci ASSERT_RTNL(); 30562306a36Sopenharmony_ci list_for_each_entry_safe(q, tmp, &tap->queue_list, next) { 30662306a36Sopenharmony_ci list_del_init(&q->next); 30762306a36Sopenharmony_ci RCU_INIT_POINTER(q->tap, NULL); 30862306a36Sopenharmony_ci if (q->enabled) 30962306a36Sopenharmony_ci tap->numvtaps--; 31062306a36Sopenharmony_ci tap->numqueues--; 31162306a36Sopenharmony_ci sock_put(&q->sk); 31262306a36Sopenharmony_ci } 31362306a36Sopenharmony_ci BUG_ON(tap->numvtaps); 31462306a36Sopenharmony_ci BUG_ON(tap->numqueues); 31562306a36Sopenharmony_ci /* guarantee that any future tap_set_queue will fail */ 31662306a36Sopenharmony_ci tap->numvtaps = MAX_TAP_QUEUES; 31762306a36Sopenharmony_ci} 31862306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(tap_del_queues); 31962306a36Sopenharmony_ci 32062306a36Sopenharmony_cirx_handler_result_t tap_handle_frame(struct sk_buff **pskb) 32162306a36Sopenharmony_ci{ 32262306a36Sopenharmony_ci struct sk_buff *skb = *pskb; 32362306a36Sopenharmony_ci struct net_device *dev = skb->dev; 32462306a36Sopenharmony_ci struct tap_dev *tap; 32562306a36Sopenharmony_ci struct tap_queue *q; 32662306a36Sopenharmony_ci netdev_features_t features = TAP_FEATURES; 32762306a36Sopenharmony_ci enum skb_drop_reason drop_reason; 32862306a36Sopenharmony_ci 32962306a36Sopenharmony_ci tap = tap_dev_get_rcu(dev); 33062306a36Sopenharmony_ci if (!tap) 33162306a36Sopenharmony_ci return RX_HANDLER_PASS; 33262306a36Sopenharmony_ci 33362306a36Sopenharmony_ci q = tap_get_queue(tap, skb); 33462306a36Sopenharmony_ci if (!q) 33562306a36Sopenharmony_ci return RX_HANDLER_PASS; 33662306a36Sopenharmony_ci 33762306a36Sopenharmony_ci skb_push(skb, ETH_HLEN); 33862306a36Sopenharmony_ci 33962306a36Sopenharmony_ci /* Apply the forward feature mask so that we perform segmentation 34062306a36Sopenharmony_ci * according to users wishes. This only works if VNET_HDR is 34162306a36Sopenharmony_ci * enabled. 34262306a36Sopenharmony_ci */ 34362306a36Sopenharmony_ci if (q->flags & IFF_VNET_HDR) 34462306a36Sopenharmony_ci features |= tap->tap_features; 34562306a36Sopenharmony_ci if (netif_needs_gso(skb, features)) { 34662306a36Sopenharmony_ci struct sk_buff *segs = __skb_gso_segment(skb, features, false); 34762306a36Sopenharmony_ci struct sk_buff *next; 34862306a36Sopenharmony_ci 34962306a36Sopenharmony_ci if (IS_ERR(segs)) { 35062306a36Sopenharmony_ci drop_reason = SKB_DROP_REASON_SKB_GSO_SEG; 35162306a36Sopenharmony_ci goto drop; 35262306a36Sopenharmony_ci } 35362306a36Sopenharmony_ci 35462306a36Sopenharmony_ci if (!segs) { 35562306a36Sopenharmony_ci if (ptr_ring_produce(&q->ring, skb)) { 35662306a36Sopenharmony_ci drop_reason = SKB_DROP_REASON_FULL_RING; 35762306a36Sopenharmony_ci goto drop; 35862306a36Sopenharmony_ci } 35962306a36Sopenharmony_ci goto wake_up; 36062306a36Sopenharmony_ci } 36162306a36Sopenharmony_ci 36262306a36Sopenharmony_ci consume_skb(skb); 36362306a36Sopenharmony_ci skb_list_walk_safe(segs, skb, next) { 36462306a36Sopenharmony_ci skb_mark_not_on_list(skb); 36562306a36Sopenharmony_ci if (ptr_ring_produce(&q->ring, skb)) { 36662306a36Sopenharmony_ci drop_reason = SKB_DROP_REASON_FULL_RING; 36762306a36Sopenharmony_ci kfree_skb_reason(skb, drop_reason); 36862306a36Sopenharmony_ci kfree_skb_list_reason(next, drop_reason); 36962306a36Sopenharmony_ci break; 37062306a36Sopenharmony_ci } 37162306a36Sopenharmony_ci } 37262306a36Sopenharmony_ci } else { 37362306a36Sopenharmony_ci /* If we receive a partial checksum and the tap side 37462306a36Sopenharmony_ci * doesn't support checksum offload, compute the checksum. 37562306a36Sopenharmony_ci * Note: it doesn't matter which checksum feature to 37662306a36Sopenharmony_ci * check, we either support them all or none. 37762306a36Sopenharmony_ci */ 37862306a36Sopenharmony_ci if (skb->ip_summed == CHECKSUM_PARTIAL && 37962306a36Sopenharmony_ci !(features & NETIF_F_CSUM_MASK) && 38062306a36Sopenharmony_ci skb_checksum_help(skb)) { 38162306a36Sopenharmony_ci drop_reason = SKB_DROP_REASON_SKB_CSUM; 38262306a36Sopenharmony_ci goto drop; 38362306a36Sopenharmony_ci } 38462306a36Sopenharmony_ci if (ptr_ring_produce(&q->ring, skb)) { 38562306a36Sopenharmony_ci drop_reason = SKB_DROP_REASON_FULL_RING; 38662306a36Sopenharmony_ci goto drop; 38762306a36Sopenharmony_ci } 38862306a36Sopenharmony_ci } 38962306a36Sopenharmony_ci 39062306a36Sopenharmony_ciwake_up: 39162306a36Sopenharmony_ci wake_up_interruptible_poll(sk_sleep(&q->sk), EPOLLIN | EPOLLRDNORM | EPOLLRDBAND); 39262306a36Sopenharmony_ci return RX_HANDLER_CONSUMED; 39362306a36Sopenharmony_ci 39462306a36Sopenharmony_cidrop: 39562306a36Sopenharmony_ci /* Count errors/drops only here, thus don't care about args. */ 39662306a36Sopenharmony_ci if (tap->count_rx_dropped) 39762306a36Sopenharmony_ci tap->count_rx_dropped(tap); 39862306a36Sopenharmony_ci kfree_skb_reason(skb, drop_reason); 39962306a36Sopenharmony_ci return RX_HANDLER_CONSUMED; 40062306a36Sopenharmony_ci} 40162306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(tap_handle_frame); 40262306a36Sopenharmony_ci 40362306a36Sopenharmony_cistatic struct major_info *tap_get_major(int major) 40462306a36Sopenharmony_ci{ 40562306a36Sopenharmony_ci struct major_info *tap_major; 40662306a36Sopenharmony_ci 40762306a36Sopenharmony_ci list_for_each_entry_rcu(tap_major, &major_list, next) { 40862306a36Sopenharmony_ci if (tap_major->major == major) 40962306a36Sopenharmony_ci return tap_major; 41062306a36Sopenharmony_ci } 41162306a36Sopenharmony_ci 41262306a36Sopenharmony_ci return NULL; 41362306a36Sopenharmony_ci} 41462306a36Sopenharmony_ci 41562306a36Sopenharmony_ciint tap_get_minor(dev_t major, struct tap_dev *tap) 41662306a36Sopenharmony_ci{ 41762306a36Sopenharmony_ci int retval = -ENOMEM; 41862306a36Sopenharmony_ci struct major_info *tap_major; 41962306a36Sopenharmony_ci 42062306a36Sopenharmony_ci rcu_read_lock(); 42162306a36Sopenharmony_ci tap_major = tap_get_major(MAJOR(major)); 42262306a36Sopenharmony_ci if (!tap_major) { 42362306a36Sopenharmony_ci retval = -EINVAL; 42462306a36Sopenharmony_ci goto unlock; 42562306a36Sopenharmony_ci } 42662306a36Sopenharmony_ci 42762306a36Sopenharmony_ci spin_lock(&tap_major->minor_lock); 42862306a36Sopenharmony_ci retval = idr_alloc(&tap_major->minor_idr, tap, 1, TAP_NUM_DEVS, GFP_ATOMIC); 42962306a36Sopenharmony_ci if (retval >= 0) { 43062306a36Sopenharmony_ci tap->minor = retval; 43162306a36Sopenharmony_ci } else if (retval == -ENOSPC) { 43262306a36Sopenharmony_ci netdev_err(tap->dev, "Too many tap devices\n"); 43362306a36Sopenharmony_ci retval = -EINVAL; 43462306a36Sopenharmony_ci } 43562306a36Sopenharmony_ci spin_unlock(&tap_major->minor_lock); 43662306a36Sopenharmony_ci 43762306a36Sopenharmony_ciunlock: 43862306a36Sopenharmony_ci rcu_read_unlock(); 43962306a36Sopenharmony_ci return retval < 0 ? retval : 0; 44062306a36Sopenharmony_ci} 44162306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(tap_get_minor); 44262306a36Sopenharmony_ci 44362306a36Sopenharmony_civoid tap_free_minor(dev_t major, struct tap_dev *tap) 44462306a36Sopenharmony_ci{ 44562306a36Sopenharmony_ci struct major_info *tap_major; 44662306a36Sopenharmony_ci 44762306a36Sopenharmony_ci rcu_read_lock(); 44862306a36Sopenharmony_ci tap_major = tap_get_major(MAJOR(major)); 44962306a36Sopenharmony_ci if (!tap_major) { 45062306a36Sopenharmony_ci goto unlock; 45162306a36Sopenharmony_ci } 45262306a36Sopenharmony_ci 45362306a36Sopenharmony_ci spin_lock(&tap_major->minor_lock); 45462306a36Sopenharmony_ci if (tap->minor) { 45562306a36Sopenharmony_ci idr_remove(&tap_major->minor_idr, tap->minor); 45662306a36Sopenharmony_ci tap->minor = 0; 45762306a36Sopenharmony_ci } 45862306a36Sopenharmony_ci spin_unlock(&tap_major->minor_lock); 45962306a36Sopenharmony_ci 46062306a36Sopenharmony_ciunlock: 46162306a36Sopenharmony_ci rcu_read_unlock(); 46262306a36Sopenharmony_ci} 46362306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(tap_free_minor); 46462306a36Sopenharmony_ci 46562306a36Sopenharmony_cistatic struct tap_dev *dev_get_by_tap_file(int major, int minor) 46662306a36Sopenharmony_ci{ 46762306a36Sopenharmony_ci struct net_device *dev = NULL; 46862306a36Sopenharmony_ci struct tap_dev *tap; 46962306a36Sopenharmony_ci struct major_info *tap_major; 47062306a36Sopenharmony_ci 47162306a36Sopenharmony_ci rcu_read_lock(); 47262306a36Sopenharmony_ci tap_major = tap_get_major(major); 47362306a36Sopenharmony_ci if (!tap_major) { 47462306a36Sopenharmony_ci tap = NULL; 47562306a36Sopenharmony_ci goto unlock; 47662306a36Sopenharmony_ci } 47762306a36Sopenharmony_ci 47862306a36Sopenharmony_ci spin_lock(&tap_major->minor_lock); 47962306a36Sopenharmony_ci tap = idr_find(&tap_major->minor_idr, minor); 48062306a36Sopenharmony_ci if (tap) { 48162306a36Sopenharmony_ci dev = tap->dev; 48262306a36Sopenharmony_ci dev_hold(dev); 48362306a36Sopenharmony_ci } 48462306a36Sopenharmony_ci spin_unlock(&tap_major->minor_lock); 48562306a36Sopenharmony_ci 48662306a36Sopenharmony_ciunlock: 48762306a36Sopenharmony_ci rcu_read_unlock(); 48862306a36Sopenharmony_ci return tap; 48962306a36Sopenharmony_ci} 49062306a36Sopenharmony_ci 49162306a36Sopenharmony_cistatic void tap_sock_write_space(struct sock *sk) 49262306a36Sopenharmony_ci{ 49362306a36Sopenharmony_ci wait_queue_head_t *wqueue; 49462306a36Sopenharmony_ci 49562306a36Sopenharmony_ci if (!sock_writeable(sk) || 49662306a36Sopenharmony_ci !test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags)) 49762306a36Sopenharmony_ci return; 49862306a36Sopenharmony_ci 49962306a36Sopenharmony_ci wqueue = sk_sleep(sk); 50062306a36Sopenharmony_ci if (wqueue && waitqueue_active(wqueue)) 50162306a36Sopenharmony_ci wake_up_interruptible_poll(wqueue, EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND); 50262306a36Sopenharmony_ci} 50362306a36Sopenharmony_ci 50462306a36Sopenharmony_cistatic void tap_sock_destruct(struct sock *sk) 50562306a36Sopenharmony_ci{ 50662306a36Sopenharmony_ci struct tap_queue *q = container_of(sk, struct tap_queue, sk); 50762306a36Sopenharmony_ci 50862306a36Sopenharmony_ci ptr_ring_cleanup(&q->ring, __skb_array_destroy_skb); 50962306a36Sopenharmony_ci} 51062306a36Sopenharmony_ci 51162306a36Sopenharmony_cistatic int tap_open(struct inode *inode, struct file *file) 51262306a36Sopenharmony_ci{ 51362306a36Sopenharmony_ci struct net *net = current->nsproxy->net_ns; 51462306a36Sopenharmony_ci struct tap_dev *tap; 51562306a36Sopenharmony_ci struct tap_queue *q; 51662306a36Sopenharmony_ci int err = -ENODEV; 51762306a36Sopenharmony_ci 51862306a36Sopenharmony_ci rtnl_lock(); 51962306a36Sopenharmony_ci tap = dev_get_by_tap_file(imajor(inode), iminor(inode)); 52062306a36Sopenharmony_ci if (!tap) 52162306a36Sopenharmony_ci goto err; 52262306a36Sopenharmony_ci 52362306a36Sopenharmony_ci err = -ENOMEM; 52462306a36Sopenharmony_ci q = (struct tap_queue *)sk_alloc(net, AF_UNSPEC, GFP_KERNEL, 52562306a36Sopenharmony_ci &tap_proto, 0); 52662306a36Sopenharmony_ci if (!q) 52762306a36Sopenharmony_ci goto err; 52862306a36Sopenharmony_ci if (ptr_ring_init(&q->ring, tap->dev->tx_queue_len, GFP_KERNEL)) { 52962306a36Sopenharmony_ci sk_free(&q->sk); 53062306a36Sopenharmony_ci goto err; 53162306a36Sopenharmony_ci } 53262306a36Sopenharmony_ci 53362306a36Sopenharmony_ci init_waitqueue_head(&q->sock.wq.wait); 53462306a36Sopenharmony_ci q->sock.type = SOCK_RAW; 53562306a36Sopenharmony_ci q->sock.state = SS_CONNECTED; 53662306a36Sopenharmony_ci q->sock.file = file; 53762306a36Sopenharmony_ci q->sock.ops = &tap_socket_ops; 53862306a36Sopenharmony_ci sock_init_data_uid(&q->sock, &q->sk, current_fsuid()); 53962306a36Sopenharmony_ci q->sk.sk_write_space = tap_sock_write_space; 54062306a36Sopenharmony_ci q->sk.sk_destruct = tap_sock_destruct; 54162306a36Sopenharmony_ci q->flags = IFF_VNET_HDR | IFF_NO_PI | IFF_TAP; 54262306a36Sopenharmony_ci q->vnet_hdr_sz = sizeof(struct virtio_net_hdr); 54362306a36Sopenharmony_ci 54462306a36Sopenharmony_ci /* 54562306a36Sopenharmony_ci * so far only KVM virtio_net uses tap, enable zero copy between 54662306a36Sopenharmony_ci * guest kernel and host kernel when lower device supports zerocopy 54762306a36Sopenharmony_ci * 54862306a36Sopenharmony_ci * The macvlan supports zerocopy iff the lower device supports zero 54962306a36Sopenharmony_ci * copy so we don't have to look at the lower device directly. 55062306a36Sopenharmony_ci */ 55162306a36Sopenharmony_ci if ((tap->dev->features & NETIF_F_HIGHDMA) && (tap->dev->features & NETIF_F_SG)) 55262306a36Sopenharmony_ci sock_set_flag(&q->sk, SOCK_ZEROCOPY); 55362306a36Sopenharmony_ci 55462306a36Sopenharmony_ci err = tap_set_queue(tap, file, q); 55562306a36Sopenharmony_ci if (err) { 55662306a36Sopenharmony_ci /* tap_sock_destruct() will take care of freeing ptr_ring */ 55762306a36Sopenharmony_ci goto err_put; 55862306a36Sopenharmony_ci } 55962306a36Sopenharmony_ci 56062306a36Sopenharmony_ci /* tap groks IOCB_NOWAIT just fine, mark it as such */ 56162306a36Sopenharmony_ci file->f_mode |= FMODE_NOWAIT; 56262306a36Sopenharmony_ci 56362306a36Sopenharmony_ci dev_put(tap->dev); 56462306a36Sopenharmony_ci 56562306a36Sopenharmony_ci rtnl_unlock(); 56662306a36Sopenharmony_ci return err; 56762306a36Sopenharmony_ci 56862306a36Sopenharmony_cierr_put: 56962306a36Sopenharmony_ci sock_put(&q->sk); 57062306a36Sopenharmony_cierr: 57162306a36Sopenharmony_ci if (tap) 57262306a36Sopenharmony_ci dev_put(tap->dev); 57362306a36Sopenharmony_ci 57462306a36Sopenharmony_ci rtnl_unlock(); 57562306a36Sopenharmony_ci return err; 57662306a36Sopenharmony_ci} 57762306a36Sopenharmony_ci 57862306a36Sopenharmony_cistatic int tap_release(struct inode *inode, struct file *file) 57962306a36Sopenharmony_ci{ 58062306a36Sopenharmony_ci struct tap_queue *q = file->private_data; 58162306a36Sopenharmony_ci tap_put_queue(q); 58262306a36Sopenharmony_ci return 0; 58362306a36Sopenharmony_ci} 58462306a36Sopenharmony_ci 58562306a36Sopenharmony_cistatic __poll_t tap_poll(struct file *file, poll_table *wait) 58662306a36Sopenharmony_ci{ 58762306a36Sopenharmony_ci struct tap_queue *q = file->private_data; 58862306a36Sopenharmony_ci __poll_t mask = EPOLLERR; 58962306a36Sopenharmony_ci 59062306a36Sopenharmony_ci if (!q) 59162306a36Sopenharmony_ci goto out; 59262306a36Sopenharmony_ci 59362306a36Sopenharmony_ci mask = 0; 59462306a36Sopenharmony_ci poll_wait(file, &q->sock.wq.wait, wait); 59562306a36Sopenharmony_ci 59662306a36Sopenharmony_ci if (!ptr_ring_empty(&q->ring)) 59762306a36Sopenharmony_ci mask |= EPOLLIN | EPOLLRDNORM; 59862306a36Sopenharmony_ci 59962306a36Sopenharmony_ci if (sock_writeable(&q->sk) || 60062306a36Sopenharmony_ci (!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &q->sock.flags) && 60162306a36Sopenharmony_ci sock_writeable(&q->sk))) 60262306a36Sopenharmony_ci mask |= EPOLLOUT | EPOLLWRNORM; 60362306a36Sopenharmony_ci 60462306a36Sopenharmony_ciout: 60562306a36Sopenharmony_ci return mask; 60662306a36Sopenharmony_ci} 60762306a36Sopenharmony_ci 60862306a36Sopenharmony_cistatic inline struct sk_buff *tap_alloc_skb(struct sock *sk, size_t prepad, 60962306a36Sopenharmony_ci size_t len, size_t linear, 61062306a36Sopenharmony_ci int noblock, int *err) 61162306a36Sopenharmony_ci{ 61262306a36Sopenharmony_ci struct sk_buff *skb; 61362306a36Sopenharmony_ci 61462306a36Sopenharmony_ci /* Under a page? Don't bother with paged skb. */ 61562306a36Sopenharmony_ci if (prepad + len < PAGE_SIZE || !linear) 61662306a36Sopenharmony_ci linear = len; 61762306a36Sopenharmony_ci 61862306a36Sopenharmony_ci if (len - linear > MAX_SKB_FRAGS * (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) 61962306a36Sopenharmony_ci linear = len - MAX_SKB_FRAGS * (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER); 62062306a36Sopenharmony_ci skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock, 62162306a36Sopenharmony_ci err, PAGE_ALLOC_COSTLY_ORDER); 62262306a36Sopenharmony_ci if (!skb) 62362306a36Sopenharmony_ci return NULL; 62462306a36Sopenharmony_ci 62562306a36Sopenharmony_ci skb_reserve(skb, prepad); 62662306a36Sopenharmony_ci skb_put(skb, linear); 62762306a36Sopenharmony_ci skb->data_len = len - linear; 62862306a36Sopenharmony_ci skb->len += len - linear; 62962306a36Sopenharmony_ci 63062306a36Sopenharmony_ci return skb; 63162306a36Sopenharmony_ci} 63262306a36Sopenharmony_ci 63362306a36Sopenharmony_ci/* Neighbour code has some assumptions on HH_DATA_MOD alignment */ 63462306a36Sopenharmony_ci#define TAP_RESERVE HH_DATA_OFF(ETH_HLEN) 63562306a36Sopenharmony_ci 63662306a36Sopenharmony_ci/* Get packet from user space buffer */ 63762306a36Sopenharmony_cistatic ssize_t tap_get_user(struct tap_queue *q, void *msg_control, 63862306a36Sopenharmony_ci struct iov_iter *from, int noblock) 63962306a36Sopenharmony_ci{ 64062306a36Sopenharmony_ci int good_linear = SKB_MAX_HEAD(TAP_RESERVE); 64162306a36Sopenharmony_ci struct sk_buff *skb; 64262306a36Sopenharmony_ci struct tap_dev *tap; 64362306a36Sopenharmony_ci unsigned long total_len = iov_iter_count(from); 64462306a36Sopenharmony_ci unsigned long len = total_len; 64562306a36Sopenharmony_ci int err; 64662306a36Sopenharmony_ci struct virtio_net_hdr vnet_hdr = { 0 }; 64762306a36Sopenharmony_ci int vnet_hdr_len = 0; 64862306a36Sopenharmony_ci int copylen = 0; 64962306a36Sopenharmony_ci int depth; 65062306a36Sopenharmony_ci bool zerocopy = false; 65162306a36Sopenharmony_ci size_t linear; 65262306a36Sopenharmony_ci enum skb_drop_reason drop_reason; 65362306a36Sopenharmony_ci 65462306a36Sopenharmony_ci if (q->flags & IFF_VNET_HDR) { 65562306a36Sopenharmony_ci vnet_hdr_len = READ_ONCE(q->vnet_hdr_sz); 65662306a36Sopenharmony_ci 65762306a36Sopenharmony_ci err = -EINVAL; 65862306a36Sopenharmony_ci if (len < vnet_hdr_len) 65962306a36Sopenharmony_ci goto err; 66062306a36Sopenharmony_ci len -= vnet_hdr_len; 66162306a36Sopenharmony_ci 66262306a36Sopenharmony_ci err = -EFAULT; 66362306a36Sopenharmony_ci if (!copy_from_iter_full(&vnet_hdr, sizeof(vnet_hdr), from)) 66462306a36Sopenharmony_ci goto err; 66562306a36Sopenharmony_ci iov_iter_advance(from, vnet_hdr_len - sizeof(vnet_hdr)); 66662306a36Sopenharmony_ci if ((vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && 66762306a36Sopenharmony_ci tap16_to_cpu(q, vnet_hdr.csum_start) + 66862306a36Sopenharmony_ci tap16_to_cpu(q, vnet_hdr.csum_offset) + 2 > 66962306a36Sopenharmony_ci tap16_to_cpu(q, vnet_hdr.hdr_len)) 67062306a36Sopenharmony_ci vnet_hdr.hdr_len = cpu_to_tap16(q, 67162306a36Sopenharmony_ci tap16_to_cpu(q, vnet_hdr.csum_start) + 67262306a36Sopenharmony_ci tap16_to_cpu(q, vnet_hdr.csum_offset) + 2); 67362306a36Sopenharmony_ci err = -EINVAL; 67462306a36Sopenharmony_ci if (tap16_to_cpu(q, vnet_hdr.hdr_len) > len) 67562306a36Sopenharmony_ci goto err; 67662306a36Sopenharmony_ci } 67762306a36Sopenharmony_ci 67862306a36Sopenharmony_ci err = -EINVAL; 67962306a36Sopenharmony_ci if (unlikely(len < ETH_HLEN)) 68062306a36Sopenharmony_ci goto err; 68162306a36Sopenharmony_ci 68262306a36Sopenharmony_ci if (msg_control && sock_flag(&q->sk, SOCK_ZEROCOPY)) { 68362306a36Sopenharmony_ci struct iov_iter i; 68462306a36Sopenharmony_ci 68562306a36Sopenharmony_ci copylen = vnet_hdr.hdr_len ? 68662306a36Sopenharmony_ci tap16_to_cpu(q, vnet_hdr.hdr_len) : GOODCOPY_LEN; 68762306a36Sopenharmony_ci if (copylen > good_linear) 68862306a36Sopenharmony_ci copylen = good_linear; 68962306a36Sopenharmony_ci else if (copylen < ETH_HLEN) 69062306a36Sopenharmony_ci copylen = ETH_HLEN; 69162306a36Sopenharmony_ci linear = copylen; 69262306a36Sopenharmony_ci i = *from; 69362306a36Sopenharmony_ci iov_iter_advance(&i, copylen); 69462306a36Sopenharmony_ci if (iov_iter_npages(&i, INT_MAX) <= MAX_SKB_FRAGS) 69562306a36Sopenharmony_ci zerocopy = true; 69662306a36Sopenharmony_ci } 69762306a36Sopenharmony_ci 69862306a36Sopenharmony_ci if (!zerocopy) { 69962306a36Sopenharmony_ci copylen = len; 70062306a36Sopenharmony_ci linear = tap16_to_cpu(q, vnet_hdr.hdr_len); 70162306a36Sopenharmony_ci if (linear > good_linear) 70262306a36Sopenharmony_ci linear = good_linear; 70362306a36Sopenharmony_ci else if (linear < ETH_HLEN) 70462306a36Sopenharmony_ci linear = ETH_HLEN; 70562306a36Sopenharmony_ci } 70662306a36Sopenharmony_ci 70762306a36Sopenharmony_ci skb = tap_alloc_skb(&q->sk, TAP_RESERVE, copylen, 70862306a36Sopenharmony_ci linear, noblock, &err); 70962306a36Sopenharmony_ci if (!skb) 71062306a36Sopenharmony_ci goto err; 71162306a36Sopenharmony_ci 71262306a36Sopenharmony_ci if (zerocopy) 71362306a36Sopenharmony_ci err = zerocopy_sg_from_iter(skb, from); 71462306a36Sopenharmony_ci else 71562306a36Sopenharmony_ci err = skb_copy_datagram_from_iter(skb, 0, from, len); 71662306a36Sopenharmony_ci 71762306a36Sopenharmony_ci if (err) { 71862306a36Sopenharmony_ci drop_reason = SKB_DROP_REASON_SKB_UCOPY_FAULT; 71962306a36Sopenharmony_ci goto err_kfree; 72062306a36Sopenharmony_ci } 72162306a36Sopenharmony_ci 72262306a36Sopenharmony_ci skb_set_network_header(skb, ETH_HLEN); 72362306a36Sopenharmony_ci skb_reset_mac_header(skb); 72462306a36Sopenharmony_ci skb->protocol = eth_hdr(skb)->h_proto; 72562306a36Sopenharmony_ci 72662306a36Sopenharmony_ci rcu_read_lock(); 72762306a36Sopenharmony_ci tap = rcu_dereference(q->tap); 72862306a36Sopenharmony_ci if (!tap) { 72962306a36Sopenharmony_ci kfree_skb(skb); 73062306a36Sopenharmony_ci rcu_read_unlock(); 73162306a36Sopenharmony_ci return total_len; 73262306a36Sopenharmony_ci } 73362306a36Sopenharmony_ci skb->dev = tap->dev; 73462306a36Sopenharmony_ci 73562306a36Sopenharmony_ci if (vnet_hdr_len) { 73662306a36Sopenharmony_ci err = virtio_net_hdr_to_skb(skb, &vnet_hdr, 73762306a36Sopenharmony_ci tap_is_little_endian(q)); 73862306a36Sopenharmony_ci if (err) { 73962306a36Sopenharmony_ci rcu_read_unlock(); 74062306a36Sopenharmony_ci drop_reason = SKB_DROP_REASON_DEV_HDR; 74162306a36Sopenharmony_ci goto err_kfree; 74262306a36Sopenharmony_ci } 74362306a36Sopenharmony_ci } 74462306a36Sopenharmony_ci 74562306a36Sopenharmony_ci skb_probe_transport_header(skb); 74662306a36Sopenharmony_ci 74762306a36Sopenharmony_ci /* Move network header to the right position for VLAN tagged packets */ 74862306a36Sopenharmony_ci if (eth_type_vlan(skb->protocol) && 74962306a36Sopenharmony_ci vlan_get_protocol_and_depth(skb, skb->protocol, &depth) != 0) 75062306a36Sopenharmony_ci skb_set_network_header(skb, depth); 75162306a36Sopenharmony_ci 75262306a36Sopenharmony_ci /* copy skb_ubuf_info for callback when skb has no error */ 75362306a36Sopenharmony_ci if (zerocopy) { 75462306a36Sopenharmony_ci skb_zcopy_init(skb, msg_control); 75562306a36Sopenharmony_ci } else if (msg_control) { 75662306a36Sopenharmony_ci struct ubuf_info *uarg = msg_control; 75762306a36Sopenharmony_ci uarg->callback(NULL, uarg, false); 75862306a36Sopenharmony_ci } 75962306a36Sopenharmony_ci 76062306a36Sopenharmony_ci dev_queue_xmit(skb); 76162306a36Sopenharmony_ci rcu_read_unlock(); 76262306a36Sopenharmony_ci return total_len; 76362306a36Sopenharmony_ci 76462306a36Sopenharmony_cierr_kfree: 76562306a36Sopenharmony_ci kfree_skb_reason(skb, drop_reason); 76662306a36Sopenharmony_ci 76762306a36Sopenharmony_cierr: 76862306a36Sopenharmony_ci rcu_read_lock(); 76962306a36Sopenharmony_ci tap = rcu_dereference(q->tap); 77062306a36Sopenharmony_ci if (tap && tap->count_tx_dropped) 77162306a36Sopenharmony_ci tap->count_tx_dropped(tap); 77262306a36Sopenharmony_ci rcu_read_unlock(); 77362306a36Sopenharmony_ci 77462306a36Sopenharmony_ci return err; 77562306a36Sopenharmony_ci} 77662306a36Sopenharmony_ci 77762306a36Sopenharmony_cistatic ssize_t tap_write_iter(struct kiocb *iocb, struct iov_iter *from) 77862306a36Sopenharmony_ci{ 77962306a36Sopenharmony_ci struct file *file = iocb->ki_filp; 78062306a36Sopenharmony_ci struct tap_queue *q = file->private_data; 78162306a36Sopenharmony_ci int noblock = 0; 78262306a36Sopenharmony_ci 78362306a36Sopenharmony_ci if ((file->f_flags & O_NONBLOCK) || (iocb->ki_flags & IOCB_NOWAIT)) 78462306a36Sopenharmony_ci noblock = 1; 78562306a36Sopenharmony_ci 78662306a36Sopenharmony_ci return tap_get_user(q, NULL, from, noblock); 78762306a36Sopenharmony_ci} 78862306a36Sopenharmony_ci 78962306a36Sopenharmony_ci/* Put packet to the user space buffer */ 79062306a36Sopenharmony_cistatic ssize_t tap_put_user(struct tap_queue *q, 79162306a36Sopenharmony_ci const struct sk_buff *skb, 79262306a36Sopenharmony_ci struct iov_iter *iter) 79362306a36Sopenharmony_ci{ 79462306a36Sopenharmony_ci int ret; 79562306a36Sopenharmony_ci int vnet_hdr_len = 0; 79662306a36Sopenharmony_ci int vlan_offset = 0; 79762306a36Sopenharmony_ci int total; 79862306a36Sopenharmony_ci 79962306a36Sopenharmony_ci if (q->flags & IFF_VNET_HDR) { 80062306a36Sopenharmony_ci int vlan_hlen = skb_vlan_tag_present(skb) ? VLAN_HLEN : 0; 80162306a36Sopenharmony_ci struct virtio_net_hdr vnet_hdr; 80262306a36Sopenharmony_ci 80362306a36Sopenharmony_ci vnet_hdr_len = READ_ONCE(q->vnet_hdr_sz); 80462306a36Sopenharmony_ci if (iov_iter_count(iter) < vnet_hdr_len) 80562306a36Sopenharmony_ci return -EINVAL; 80662306a36Sopenharmony_ci 80762306a36Sopenharmony_ci if (virtio_net_hdr_from_skb(skb, &vnet_hdr, 80862306a36Sopenharmony_ci tap_is_little_endian(q), true, 80962306a36Sopenharmony_ci vlan_hlen)) 81062306a36Sopenharmony_ci BUG(); 81162306a36Sopenharmony_ci 81262306a36Sopenharmony_ci if (copy_to_iter(&vnet_hdr, sizeof(vnet_hdr), iter) != 81362306a36Sopenharmony_ci sizeof(vnet_hdr)) 81462306a36Sopenharmony_ci return -EFAULT; 81562306a36Sopenharmony_ci 81662306a36Sopenharmony_ci iov_iter_advance(iter, vnet_hdr_len - sizeof(vnet_hdr)); 81762306a36Sopenharmony_ci } 81862306a36Sopenharmony_ci total = vnet_hdr_len; 81962306a36Sopenharmony_ci total += skb->len; 82062306a36Sopenharmony_ci 82162306a36Sopenharmony_ci if (skb_vlan_tag_present(skb)) { 82262306a36Sopenharmony_ci struct { 82362306a36Sopenharmony_ci __be16 h_vlan_proto; 82462306a36Sopenharmony_ci __be16 h_vlan_TCI; 82562306a36Sopenharmony_ci } veth; 82662306a36Sopenharmony_ci veth.h_vlan_proto = skb->vlan_proto; 82762306a36Sopenharmony_ci veth.h_vlan_TCI = htons(skb_vlan_tag_get(skb)); 82862306a36Sopenharmony_ci 82962306a36Sopenharmony_ci vlan_offset = offsetof(struct vlan_ethhdr, h_vlan_proto); 83062306a36Sopenharmony_ci total += VLAN_HLEN; 83162306a36Sopenharmony_ci 83262306a36Sopenharmony_ci ret = skb_copy_datagram_iter(skb, 0, iter, vlan_offset); 83362306a36Sopenharmony_ci if (ret || !iov_iter_count(iter)) 83462306a36Sopenharmony_ci goto done; 83562306a36Sopenharmony_ci 83662306a36Sopenharmony_ci ret = copy_to_iter(&veth, sizeof(veth), iter); 83762306a36Sopenharmony_ci if (ret != sizeof(veth) || !iov_iter_count(iter)) 83862306a36Sopenharmony_ci goto done; 83962306a36Sopenharmony_ci } 84062306a36Sopenharmony_ci 84162306a36Sopenharmony_ci ret = skb_copy_datagram_iter(skb, vlan_offset, iter, 84262306a36Sopenharmony_ci skb->len - vlan_offset); 84362306a36Sopenharmony_ci 84462306a36Sopenharmony_cidone: 84562306a36Sopenharmony_ci return ret ? ret : total; 84662306a36Sopenharmony_ci} 84762306a36Sopenharmony_ci 84862306a36Sopenharmony_cistatic ssize_t tap_do_read(struct tap_queue *q, 84962306a36Sopenharmony_ci struct iov_iter *to, 85062306a36Sopenharmony_ci int noblock, struct sk_buff *skb) 85162306a36Sopenharmony_ci{ 85262306a36Sopenharmony_ci DEFINE_WAIT(wait); 85362306a36Sopenharmony_ci ssize_t ret = 0; 85462306a36Sopenharmony_ci 85562306a36Sopenharmony_ci if (!iov_iter_count(to)) { 85662306a36Sopenharmony_ci kfree_skb(skb); 85762306a36Sopenharmony_ci return 0; 85862306a36Sopenharmony_ci } 85962306a36Sopenharmony_ci 86062306a36Sopenharmony_ci if (skb) 86162306a36Sopenharmony_ci goto put; 86262306a36Sopenharmony_ci 86362306a36Sopenharmony_ci while (1) { 86462306a36Sopenharmony_ci if (!noblock) 86562306a36Sopenharmony_ci prepare_to_wait(sk_sleep(&q->sk), &wait, 86662306a36Sopenharmony_ci TASK_INTERRUPTIBLE); 86762306a36Sopenharmony_ci 86862306a36Sopenharmony_ci /* Read frames from the queue */ 86962306a36Sopenharmony_ci skb = ptr_ring_consume(&q->ring); 87062306a36Sopenharmony_ci if (skb) 87162306a36Sopenharmony_ci break; 87262306a36Sopenharmony_ci if (noblock) { 87362306a36Sopenharmony_ci ret = -EAGAIN; 87462306a36Sopenharmony_ci break; 87562306a36Sopenharmony_ci } 87662306a36Sopenharmony_ci if (signal_pending(current)) { 87762306a36Sopenharmony_ci ret = -ERESTARTSYS; 87862306a36Sopenharmony_ci break; 87962306a36Sopenharmony_ci } 88062306a36Sopenharmony_ci /* Nothing to read, let's sleep */ 88162306a36Sopenharmony_ci schedule(); 88262306a36Sopenharmony_ci } 88362306a36Sopenharmony_ci if (!noblock) 88462306a36Sopenharmony_ci finish_wait(sk_sleep(&q->sk), &wait); 88562306a36Sopenharmony_ci 88662306a36Sopenharmony_ciput: 88762306a36Sopenharmony_ci if (skb) { 88862306a36Sopenharmony_ci ret = tap_put_user(q, skb, to); 88962306a36Sopenharmony_ci if (unlikely(ret < 0)) 89062306a36Sopenharmony_ci kfree_skb(skb); 89162306a36Sopenharmony_ci else 89262306a36Sopenharmony_ci consume_skb(skb); 89362306a36Sopenharmony_ci } 89462306a36Sopenharmony_ci return ret; 89562306a36Sopenharmony_ci} 89662306a36Sopenharmony_ci 89762306a36Sopenharmony_cistatic ssize_t tap_read_iter(struct kiocb *iocb, struct iov_iter *to) 89862306a36Sopenharmony_ci{ 89962306a36Sopenharmony_ci struct file *file = iocb->ki_filp; 90062306a36Sopenharmony_ci struct tap_queue *q = file->private_data; 90162306a36Sopenharmony_ci ssize_t len = iov_iter_count(to), ret; 90262306a36Sopenharmony_ci int noblock = 0; 90362306a36Sopenharmony_ci 90462306a36Sopenharmony_ci if ((file->f_flags & O_NONBLOCK) || (iocb->ki_flags & IOCB_NOWAIT)) 90562306a36Sopenharmony_ci noblock = 1; 90662306a36Sopenharmony_ci 90762306a36Sopenharmony_ci ret = tap_do_read(q, to, noblock, NULL); 90862306a36Sopenharmony_ci ret = min_t(ssize_t, ret, len); 90962306a36Sopenharmony_ci if (ret > 0) 91062306a36Sopenharmony_ci iocb->ki_pos = ret; 91162306a36Sopenharmony_ci return ret; 91262306a36Sopenharmony_ci} 91362306a36Sopenharmony_ci 91462306a36Sopenharmony_cistatic struct tap_dev *tap_get_tap_dev(struct tap_queue *q) 91562306a36Sopenharmony_ci{ 91662306a36Sopenharmony_ci struct tap_dev *tap; 91762306a36Sopenharmony_ci 91862306a36Sopenharmony_ci ASSERT_RTNL(); 91962306a36Sopenharmony_ci tap = rtnl_dereference(q->tap); 92062306a36Sopenharmony_ci if (tap) 92162306a36Sopenharmony_ci dev_hold(tap->dev); 92262306a36Sopenharmony_ci 92362306a36Sopenharmony_ci return tap; 92462306a36Sopenharmony_ci} 92562306a36Sopenharmony_ci 92662306a36Sopenharmony_cistatic void tap_put_tap_dev(struct tap_dev *tap) 92762306a36Sopenharmony_ci{ 92862306a36Sopenharmony_ci dev_put(tap->dev); 92962306a36Sopenharmony_ci} 93062306a36Sopenharmony_ci 93162306a36Sopenharmony_cistatic int tap_ioctl_set_queue(struct file *file, unsigned int flags) 93262306a36Sopenharmony_ci{ 93362306a36Sopenharmony_ci struct tap_queue *q = file->private_data; 93462306a36Sopenharmony_ci struct tap_dev *tap; 93562306a36Sopenharmony_ci int ret; 93662306a36Sopenharmony_ci 93762306a36Sopenharmony_ci tap = tap_get_tap_dev(q); 93862306a36Sopenharmony_ci if (!tap) 93962306a36Sopenharmony_ci return -EINVAL; 94062306a36Sopenharmony_ci 94162306a36Sopenharmony_ci if (flags & IFF_ATTACH_QUEUE) 94262306a36Sopenharmony_ci ret = tap_enable_queue(tap, file, q); 94362306a36Sopenharmony_ci else if (flags & IFF_DETACH_QUEUE) 94462306a36Sopenharmony_ci ret = tap_disable_queue(q); 94562306a36Sopenharmony_ci else 94662306a36Sopenharmony_ci ret = -EINVAL; 94762306a36Sopenharmony_ci 94862306a36Sopenharmony_ci tap_put_tap_dev(tap); 94962306a36Sopenharmony_ci return ret; 95062306a36Sopenharmony_ci} 95162306a36Sopenharmony_ci 95262306a36Sopenharmony_cistatic int set_offload(struct tap_queue *q, unsigned long arg) 95362306a36Sopenharmony_ci{ 95462306a36Sopenharmony_ci struct tap_dev *tap; 95562306a36Sopenharmony_ci netdev_features_t features; 95662306a36Sopenharmony_ci netdev_features_t feature_mask = 0; 95762306a36Sopenharmony_ci 95862306a36Sopenharmony_ci tap = rtnl_dereference(q->tap); 95962306a36Sopenharmony_ci if (!tap) 96062306a36Sopenharmony_ci return -ENOLINK; 96162306a36Sopenharmony_ci 96262306a36Sopenharmony_ci features = tap->dev->features; 96362306a36Sopenharmony_ci 96462306a36Sopenharmony_ci if (arg & TUN_F_CSUM) { 96562306a36Sopenharmony_ci feature_mask = NETIF_F_HW_CSUM; 96662306a36Sopenharmony_ci 96762306a36Sopenharmony_ci if (arg & (TUN_F_TSO4 | TUN_F_TSO6)) { 96862306a36Sopenharmony_ci if (arg & TUN_F_TSO_ECN) 96962306a36Sopenharmony_ci feature_mask |= NETIF_F_TSO_ECN; 97062306a36Sopenharmony_ci if (arg & TUN_F_TSO4) 97162306a36Sopenharmony_ci feature_mask |= NETIF_F_TSO; 97262306a36Sopenharmony_ci if (arg & TUN_F_TSO6) 97362306a36Sopenharmony_ci feature_mask |= NETIF_F_TSO6; 97462306a36Sopenharmony_ci } 97562306a36Sopenharmony_ci 97662306a36Sopenharmony_ci /* TODO: for now USO4 and USO6 should work simultaneously */ 97762306a36Sopenharmony_ci if ((arg & (TUN_F_USO4 | TUN_F_USO6)) == (TUN_F_USO4 | TUN_F_USO6)) 97862306a36Sopenharmony_ci features |= NETIF_F_GSO_UDP_L4; 97962306a36Sopenharmony_ci } 98062306a36Sopenharmony_ci 98162306a36Sopenharmony_ci /* tun/tap driver inverts the usage for TSO offloads, where 98262306a36Sopenharmony_ci * setting the TSO bit means that the userspace wants to 98362306a36Sopenharmony_ci * accept TSO frames and turning it off means that user space 98462306a36Sopenharmony_ci * does not support TSO. 98562306a36Sopenharmony_ci * For tap, we have to invert it to mean the same thing. 98662306a36Sopenharmony_ci * When user space turns off TSO, we turn off GSO/LRO so that 98762306a36Sopenharmony_ci * user-space will not receive TSO frames. 98862306a36Sopenharmony_ci */ 98962306a36Sopenharmony_ci if (feature_mask & (NETIF_F_TSO | NETIF_F_TSO6) || 99062306a36Sopenharmony_ci (feature_mask & (TUN_F_USO4 | TUN_F_USO6)) == (TUN_F_USO4 | TUN_F_USO6)) 99162306a36Sopenharmony_ci features |= RX_OFFLOADS; 99262306a36Sopenharmony_ci else 99362306a36Sopenharmony_ci features &= ~RX_OFFLOADS; 99462306a36Sopenharmony_ci 99562306a36Sopenharmony_ci /* tap_features are the same as features on tun/tap and 99662306a36Sopenharmony_ci * reflect user expectations. 99762306a36Sopenharmony_ci */ 99862306a36Sopenharmony_ci tap->tap_features = feature_mask; 99962306a36Sopenharmony_ci if (tap->update_features) 100062306a36Sopenharmony_ci tap->update_features(tap, features); 100162306a36Sopenharmony_ci 100262306a36Sopenharmony_ci return 0; 100362306a36Sopenharmony_ci} 100462306a36Sopenharmony_ci 100562306a36Sopenharmony_ci/* 100662306a36Sopenharmony_ci * provide compatibility with generic tun/tap interface 100762306a36Sopenharmony_ci */ 100862306a36Sopenharmony_cistatic long tap_ioctl(struct file *file, unsigned int cmd, 100962306a36Sopenharmony_ci unsigned long arg) 101062306a36Sopenharmony_ci{ 101162306a36Sopenharmony_ci struct tap_queue *q = file->private_data; 101262306a36Sopenharmony_ci struct tap_dev *tap; 101362306a36Sopenharmony_ci void __user *argp = (void __user *)arg; 101462306a36Sopenharmony_ci struct ifreq __user *ifr = argp; 101562306a36Sopenharmony_ci unsigned int __user *up = argp; 101662306a36Sopenharmony_ci unsigned short u; 101762306a36Sopenharmony_ci int __user *sp = argp; 101862306a36Sopenharmony_ci struct sockaddr sa; 101962306a36Sopenharmony_ci int s; 102062306a36Sopenharmony_ci int ret; 102162306a36Sopenharmony_ci 102262306a36Sopenharmony_ci switch (cmd) { 102362306a36Sopenharmony_ci case TUNSETIFF: 102462306a36Sopenharmony_ci /* ignore the name, just look at flags */ 102562306a36Sopenharmony_ci if (get_user(u, &ifr->ifr_flags)) 102662306a36Sopenharmony_ci return -EFAULT; 102762306a36Sopenharmony_ci 102862306a36Sopenharmony_ci ret = 0; 102962306a36Sopenharmony_ci if ((u & ~TAP_IFFEATURES) != (IFF_NO_PI | IFF_TAP)) 103062306a36Sopenharmony_ci ret = -EINVAL; 103162306a36Sopenharmony_ci else 103262306a36Sopenharmony_ci q->flags = (q->flags & ~TAP_IFFEATURES) | u; 103362306a36Sopenharmony_ci 103462306a36Sopenharmony_ci return ret; 103562306a36Sopenharmony_ci 103662306a36Sopenharmony_ci case TUNGETIFF: 103762306a36Sopenharmony_ci rtnl_lock(); 103862306a36Sopenharmony_ci tap = tap_get_tap_dev(q); 103962306a36Sopenharmony_ci if (!tap) { 104062306a36Sopenharmony_ci rtnl_unlock(); 104162306a36Sopenharmony_ci return -ENOLINK; 104262306a36Sopenharmony_ci } 104362306a36Sopenharmony_ci 104462306a36Sopenharmony_ci ret = 0; 104562306a36Sopenharmony_ci u = q->flags; 104662306a36Sopenharmony_ci if (copy_to_user(&ifr->ifr_name, tap->dev->name, IFNAMSIZ) || 104762306a36Sopenharmony_ci put_user(u, &ifr->ifr_flags)) 104862306a36Sopenharmony_ci ret = -EFAULT; 104962306a36Sopenharmony_ci tap_put_tap_dev(tap); 105062306a36Sopenharmony_ci rtnl_unlock(); 105162306a36Sopenharmony_ci return ret; 105262306a36Sopenharmony_ci 105362306a36Sopenharmony_ci case TUNSETQUEUE: 105462306a36Sopenharmony_ci if (get_user(u, &ifr->ifr_flags)) 105562306a36Sopenharmony_ci return -EFAULT; 105662306a36Sopenharmony_ci rtnl_lock(); 105762306a36Sopenharmony_ci ret = tap_ioctl_set_queue(file, u); 105862306a36Sopenharmony_ci rtnl_unlock(); 105962306a36Sopenharmony_ci return ret; 106062306a36Sopenharmony_ci 106162306a36Sopenharmony_ci case TUNGETFEATURES: 106262306a36Sopenharmony_ci if (put_user(IFF_TAP | IFF_NO_PI | TAP_IFFEATURES, up)) 106362306a36Sopenharmony_ci return -EFAULT; 106462306a36Sopenharmony_ci return 0; 106562306a36Sopenharmony_ci 106662306a36Sopenharmony_ci case TUNSETSNDBUF: 106762306a36Sopenharmony_ci if (get_user(s, sp)) 106862306a36Sopenharmony_ci return -EFAULT; 106962306a36Sopenharmony_ci if (s <= 0) 107062306a36Sopenharmony_ci return -EINVAL; 107162306a36Sopenharmony_ci 107262306a36Sopenharmony_ci q->sk.sk_sndbuf = s; 107362306a36Sopenharmony_ci return 0; 107462306a36Sopenharmony_ci 107562306a36Sopenharmony_ci case TUNGETVNETHDRSZ: 107662306a36Sopenharmony_ci s = q->vnet_hdr_sz; 107762306a36Sopenharmony_ci if (put_user(s, sp)) 107862306a36Sopenharmony_ci return -EFAULT; 107962306a36Sopenharmony_ci return 0; 108062306a36Sopenharmony_ci 108162306a36Sopenharmony_ci case TUNSETVNETHDRSZ: 108262306a36Sopenharmony_ci if (get_user(s, sp)) 108362306a36Sopenharmony_ci return -EFAULT; 108462306a36Sopenharmony_ci if (s < (int)sizeof(struct virtio_net_hdr)) 108562306a36Sopenharmony_ci return -EINVAL; 108662306a36Sopenharmony_ci 108762306a36Sopenharmony_ci q->vnet_hdr_sz = s; 108862306a36Sopenharmony_ci return 0; 108962306a36Sopenharmony_ci 109062306a36Sopenharmony_ci case TUNGETVNETLE: 109162306a36Sopenharmony_ci s = !!(q->flags & TAP_VNET_LE); 109262306a36Sopenharmony_ci if (put_user(s, sp)) 109362306a36Sopenharmony_ci return -EFAULT; 109462306a36Sopenharmony_ci return 0; 109562306a36Sopenharmony_ci 109662306a36Sopenharmony_ci case TUNSETVNETLE: 109762306a36Sopenharmony_ci if (get_user(s, sp)) 109862306a36Sopenharmony_ci return -EFAULT; 109962306a36Sopenharmony_ci if (s) 110062306a36Sopenharmony_ci q->flags |= TAP_VNET_LE; 110162306a36Sopenharmony_ci else 110262306a36Sopenharmony_ci q->flags &= ~TAP_VNET_LE; 110362306a36Sopenharmony_ci return 0; 110462306a36Sopenharmony_ci 110562306a36Sopenharmony_ci case TUNGETVNETBE: 110662306a36Sopenharmony_ci return tap_get_vnet_be(q, sp); 110762306a36Sopenharmony_ci 110862306a36Sopenharmony_ci case TUNSETVNETBE: 110962306a36Sopenharmony_ci return tap_set_vnet_be(q, sp); 111062306a36Sopenharmony_ci 111162306a36Sopenharmony_ci case TUNSETOFFLOAD: 111262306a36Sopenharmony_ci /* let the user check for future flags */ 111362306a36Sopenharmony_ci if (arg & ~(TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | 111462306a36Sopenharmony_ci TUN_F_TSO_ECN | TUN_F_UFO | 111562306a36Sopenharmony_ci TUN_F_USO4 | TUN_F_USO6)) 111662306a36Sopenharmony_ci return -EINVAL; 111762306a36Sopenharmony_ci 111862306a36Sopenharmony_ci rtnl_lock(); 111962306a36Sopenharmony_ci ret = set_offload(q, arg); 112062306a36Sopenharmony_ci rtnl_unlock(); 112162306a36Sopenharmony_ci return ret; 112262306a36Sopenharmony_ci 112362306a36Sopenharmony_ci case SIOCGIFHWADDR: 112462306a36Sopenharmony_ci rtnl_lock(); 112562306a36Sopenharmony_ci tap = tap_get_tap_dev(q); 112662306a36Sopenharmony_ci if (!tap) { 112762306a36Sopenharmony_ci rtnl_unlock(); 112862306a36Sopenharmony_ci return -ENOLINK; 112962306a36Sopenharmony_ci } 113062306a36Sopenharmony_ci ret = 0; 113162306a36Sopenharmony_ci dev_get_mac_address(&sa, dev_net(tap->dev), tap->dev->name); 113262306a36Sopenharmony_ci if (copy_to_user(&ifr->ifr_name, tap->dev->name, IFNAMSIZ) || 113362306a36Sopenharmony_ci copy_to_user(&ifr->ifr_hwaddr, &sa, sizeof(sa))) 113462306a36Sopenharmony_ci ret = -EFAULT; 113562306a36Sopenharmony_ci tap_put_tap_dev(tap); 113662306a36Sopenharmony_ci rtnl_unlock(); 113762306a36Sopenharmony_ci return ret; 113862306a36Sopenharmony_ci 113962306a36Sopenharmony_ci case SIOCSIFHWADDR: 114062306a36Sopenharmony_ci if (copy_from_user(&sa, &ifr->ifr_hwaddr, sizeof(sa))) 114162306a36Sopenharmony_ci return -EFAULT; 114262306a36Sopenharmony_ci rtnl_lock(); 114362306a36Sopenharmony_ci tap = tap_get_tap_dev(q); 114462306a36Sopenharmony_ci if (!tap) { 114562306a36Sopenharmony_ci rtnl_unlock(); 114662306a36Sopenharmony_ci return -ENOLINK; 114762306a36Sopenharmony_ci } 114862306a36Sopenharmony_ci ret = dev_set_mac_address_user(tap->dev, &sa, NULL); 114962306a36Sopenharmony_ci tap_put_tap_dev(tap); 115062306a36Sopenharmony_ci rtnl_unlock(); 115162306a36Sopenharmony_ci return ret; 115262306a36Sopenharmony_ci 115362306a36Sopenharmony_ci default: 115462306a36Sopenharmony_ci return -EINVAL; 115562306a36Sopenharmony_ci } 115662306a36Sopenharmony_ci} 115762306a36Sopenharmony_ci 115862306a36Sopenharmony_cistatic const struct file_operations tap_fops = { 115962306a36Sopenharmony_ci .owner = THIS_MODULE, 116062306a36Sopenharmony_ci .open = tap_open, 116162306a36Sopenharmony_ci .release = tap_release, 116262306a36Sopenharmony_ci .read_iter = tap_read_iter, 116362306a36Sopenharmony_ci .write_iter = tap_write_iter, 116462306a36Sopenharmony_ci .poll = tap_poll, 116562306a36Sopenharmony_ci .llseek = no_llseek, 116662306a36Sopenharmony_ci .unlocked_ioctl = tap_ioctl, 116762306a36Sopenharmony_ci .compat_ioctl = compat_ptr_ioctl, 116862306a36Sopenharmony_ci}; 116962306a36Sopenharmony_ci 117062306a36Sopenharmony_cistatic int tap_get_user_xdp(struct tap_queue *q, struct xdp_buff *xdp) 117162306a36Sopenharmony_ci{ 117262306a36Sopenharmony_ci struct tun_xdp_hdr *hdr = xdp->data_hard_start; 117362306a36Sopenharmony_ci struct virtio_net_hdr *gso = &hdr->gso; 117462306a36Sopenharmony_ci int buflen = hdr->buflen; 117562306a36Sopenharmony_ci int vnet_hdr_len = 0; 117662306a36Sopenharmony_ci struct tap_dev *tap; 117762306a36Sopenharmony_ci struct sk_buff *skb; 117862306a36Sopenharmony_ci int err, depth; 117962306a36Sopenharmony_ci 118062306a36Sopenharmony_ci if (q->flags & IFF_VNET_HDR) 118162306a36Sopenharmony_ci vnet_hdr_len = READ_ONCE(q->vnet_hdr_sz); 118262306a36Sopenharmony_ci 118362306a36Sopenharmony_ci skb = build_skb(xdp->data_hard_start, buflen); 118462306a36Sopenharmony_ci if (!skb) { 118562306a36Sopenharmony_ci err = -ENOMEM; 118662306a36Sopenharmony_ci goto err; 118762306a36Sopenharmony_ci } 118862306a36Sopenharmony_ci 118962306a36Sopenharmony_ci skb_reserve(skb, xdp->data - xdp->data_hard_start); 119062306a36Sopenharmony_ci skb_put(skb, xdp->data_end - xdp->data); 119162306a36Sopenharmony_ci 119262306a36Sopenharmony_ci skb_set_network_header(skb, ETH_HLEN); 119362306a36Sopenharmony_ci skb_reset_mac_header(skb); 119462306a36Sopenharmony_ci skb->protocol = eth_hdr(skb)->h_proto; 119562306a36Sopenharmony_ci 119662306a36Sopenharmony_ci if (vnet_hdr_len) { 119762306a36Sopenharmony_ci err = virtio_net_hdr_to_skb(skb, gso, tap_is_little_endian(q)); 119862306a36Sopenharmony_ci if (err) 119962306a36Sopenharmony_ci goto err_kfree; 120062306a36Sopenharmony_ci } 120162306a36Sopenharmony_ci 120262306a36Sopenharmony_ci /* Move network header to the right position for VLAN tagged packets */ 120362306a36Sopenharmony_ci if (eth_type_vlan(skb->protocol) && 120462306a36Sopenharmony_ci vlan_get_protocol_and_depth(skb, skb->protocol, &depth) != 0) 120562306a36Sopenharmony_ci skb_set_network_header(skb, depth); 120662306a36Sopenharmony_ci 120762306a36Sopenharmony_ci rcu_read_lock(); 120862306a36Sopenharmony_ci tap = rcu_dereference(q->tap); 120962306a36Sopenharmony_ci if (tap) { 121062306a36Sopenharmony_ci skb->dev = tap->dev; 121162306a36Sopenharmony_ci skb_probe_transport_header(skb); 121262306a36Sopenharmony_ci dev_queue_xmit(skb); 121362306a36Sopenharmony_ci } else { 121462306a36Sopenharmony_ci kfree_skb(skb); 121562306a36Sopenharmony_ci } 121662306a36Sopenharmony_ci rcu_read_unlock(); 121762306a36Sopenharmony_ci 121862306a36Sopenharmony_ci return 0; 121962306a36Sopenharmony_ci 122062306a36Sopenharmony_cierr_kfree: 122162306a36Sopenharmony_ci kfree_skb(skb); 122262306a36Sopenharmony_cierr: 122362306a36Sopenharmony_ci rcu_read_lock(); 122462306a36Sopenharmony_ci tap = rcu_dereference(q->tap); 122562306a36Sopenharmony_ci if (tap && tap->count_tx_dropped) 122662306a36Sopenharmony_ci tap->count_tx_dropped(tap); 122762306a36Sopenharmony_ci rcu_read_unlock(); 122862306a36Sopenharmony_ci return err; 122962306a36Sopenharmony_ci} 123062306a36Sopenharmony_ci 123162306a36Sopenharmony_cistatic int tap_sendmsg(struct socket *sock, struct msghdr *m, 123262306a36Sopenharmony_ci size_t total_len) 123362306a36Sopenharmony_ci{ 123462306a36Sopenharmony_ci struct tap_queue *q = container_of(sock, struct tap_queue, sock); 123562306a36Sopenharmony_ci struct tun_msg_ctl *ctl = m->msg_control; 123662306a36Sopenharmony_ci struct xdp_buff *xdp; 123762306a36Sopenharmony_ci int i; 123862306a36Sopenharmony_ci 123962306a36Sopenharmony_ci if (m->msg_controllen == sizeof(struct tun_msg_ctl) && 124062306a36Sopenharmony_ci ctl && ctl->type == TUN_MSG_PTR) { 124162306a36Sopenharmony_ci for (i = 0; i < ctl->num; i++) { 124262306a36Sopenharmony_ci xdp = &((struct xdp_buff *)ctl->ptr)[i]; 124362306a36Sopenharmony_ci tap_get_user_xdp(q, xdp); 124462306a36Sopenharmony_ci } 124562306a36Sopenharmony_ci return 0; 124662306a36Sopenharmony_ci } 124762306a36Sopenharmony_ci 124862306a36Sopenharmony_ci return tap_get_user(q, ctl ? ctl->ptr : NULL, &m->msg_iter, 124962306a36Sopenharmony_ci m->msg_flags & MSG_DONTWAIT); 125062306a36Sopenharmony_ci} 125162306a36Sopenharmony_ci 125262306a36Sopenharmony_cistatic int tap_recvmsg(struct socket *sock, struct msghdr *m, 125362306a36Sopenharmony_ci size_t total_len, int flags) 125462306a36Sopenharmony_ci{ 125562306a36Sopenharmony_ci struct tap_queue *q = container_of(sock, struct tap_queue, sock); 125662306a36Sopenharmony_ci struct sk_buff *skb = m->msg_control; 125762306a36Sopenharmony_ci int ret; 125862306a36Sopenharmony_ci if (flags & ~(MSG_DONTWAIT|MSG_TRUNC)) { 125962306a36Sopenharmony_ci kfree_skb(skb); 126062306a36Sopenharmony_ci return -EINVAL; 126162306a36Sopenharmony_ci } 126262306a36Sopenharmony_ci ret = tap_do_read(q, &m->msg_iter, flags & MSG_DONTWAIT, skb); 126362306a36Sopenharmony_ci if (ret > total_len) { 126462306a36Sopenharmony_ci m->msg_flags |= MSG_TRUNC; 126562306a36Sopenharmony_ci ret = flags & MSG_TRUNC ? ret : total_len; 126662306a36Sopenharmony_ci } 126762306a36Sopenharmony_ci return ret; 126862306a36Sopenharmony_ci} 126962306a36Sopenharmony_ci 127062306a36Sopenharmony_cistatic int tap_peek_len(struct socket *sock) 127162306a36Sopenharmony_ci{ 127262306a36Sopenharmony_ci struct tap_queue *q = container_of(sock, struct tap_queue, 127362306a36Sopenharmony_ci sock); 127462306a36Sopenharmony_ci return PTR_RING_PEEK_CALL(&q->ring, __skb_array_len_with_tag); 127562306a36Sopenharmony_ci} 127662306a36Sopenharmony_ci 127762306a36Sopenharmony_ci/* Ops structure to mimic raw sockets with tun */ 127862306a36Sopenharmony_cistatic const struct proto_ops tap_socket_ops = { 127962306a36Sopenharmony_ci .sendmsg = tap_sendmsg, 128062306a36Sopenharmony_ci .recvmsg = tap_recvmsg, 128162306a36Sopenharmony_ci .peek_len = tap_peek_len, 128262306a36Sopenharmony_ci}; 128362306a36Sopenharmony_ci 128462306a36Sopenharmony_ci/* Get an underlying socket object from tun file. Returns error unless file is 128562306a36Sopenharmony_ci * attached to a device. The returned object works like a packet socket, it 128662306a36Sopenharmony_ci * can be used for sock_sendmsg/sock_recvmsg. The caller is responsible for 128762306a36Sopenharmony_ci * holding a reference to the file for as long as the socket is in use. */ 128862306a36Sopenharmony_cistruct socket *tap_get_socket(struct file *file) 128962306a36Sopenharmony_ci{ 129062306a36Sopenharmony_ci struct tap_queue *q; 129162306a36Sopenharmony_ci if (file->f_op != &tap_fops) 129262306a36Sopenharmony_ci return ERR_PTR(-EINVAL); 129362306a36Sopenharmony_ci q = file->private_data; 129462306a36Sopenharmony_ci if (!q) 129562306a36Sopenharmony_ci return ERR_PTR(-EBADFD); 129662306a36Sopenharmony_ci return &q->sock; 129762306a36Sopenharmony_ci} 129862306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(tap_get_socket); 129962306a36Sopenharmony_ci 130062306a36Sopenharmony_cistruct ptr_ring *tap_get_ptr_ring(struct file *file) 130162306a36Sopenharmony_ci{ 130262306a36Sopenharmony_ci struct tap_queue *q; 130362306a36Sopenharmony_ci 130462306a36Sopenharmony_ci if (file->f_op != &tap_fops) 130562306a36Sopenharmony_ci return ERR_PTR(-EINVAL); 130662306a36Sopenharmony_ci q = file->private_data; 130762306a36Sopenharmony_ci if (!q) 130862306a36Sopenharmony_ci return ERR_PTR(-EBADFD); 130962306a36Sopenharmony_ci return &q->ring; 131062306a36Sopenharmony_ci} 131162306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(tap_get_ptr_ring); 131262306a36Sopenharmony_ci 131362306a36Sopenharmony_ciint tap_queue_resize(struct tap_dev *tap) 131462306a36Sopenharmony_ci{ 131562306a36Sopenharmony_ci struct net_device *dev = tap->dev; 131662306a36Sopenharmony_ci struct tap_queue *q; 131762306a36Sopenharmony_ci struct ptr_ring **rings; 131862306a36Sopenharmony_ci int n = tap->numqueues; 131962306a36Sopenharmony_ci int ret, i = 0; 132062306a36Sopenharmony_ci 132162306a36Sopenharmony_ci rings = kmalloc_array(n, sizeof(*rings), GFP_KERNEL); 132262306a36Sopenharmony_ci if (!rings) 132362306a36Sopenharmony_ci return -ENOMEM; 132462306a36Sopenharmony_ci 132562306a36Sopenharmony_ci list_for_each_entry(q, &tap->queue_list, next) 132662306a36Sopenharmony_ci rings[i++] = &q->ring; 132762306a36Sopenharmony_ci 132862306a36Sopenharmony_ci ret = ptr_ring_resize_multiple(rings, n, 132962306a36Sopenharmony_ci dev->tx_queue_len, GFP_KERNEL, 133062306a36Sopenharmony_ci __skb_array_destroy_skb); 133162306a36Sopenharmony_ci 133262306a36Sopenharmony_ci kfree(rings); 133362306a36Sopenharmony_ci return ret; 133462306a36Sopenharmony_ci} 133562306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(tap_queue_resize); 133662306a36Sopenharmony_ci 133762306a36Sopenharmony_cistatic int tap_list_add(dev_t major, const char *device_name) 133862306a36Sopenharmony_ci{ 133962306a36Sopenharmony_ci struct major_info *tap_major; 134062306a36Sopenharmony_ci 134162306a36Sopenharmony_ci tap_major = kzalloc(sizeof(*tap_major), GFP_ATOMIC); 134262306a36Sopenharmony_ci if (!tap_major) 134362306a36Sopenharmony_ci return -ENOMEM; 134462306a36Sopenharmony_ci 134562306a36Sopenharmony_ci tap_major->major = MAJOR(major); 134662306a36Sopenharmony_ci 134762306a36Sopenharmony_ci idr_init(&tap_major->minor_idr); 134862306a36Sopenharmony_ci spin_lock_init(&tap_major->minor_lock); 134962306a36Sopenharmony_ci 135062306a36Sopenharmony_ci tap_major->device_name = device_name; 135162306a36Sopenharmony_ci 135262306a36Sopenharmony_ci list_add_tail_rcu(&tap_major->next, &major_list); 135362306a36Sopenharmony_ci return 0; 135462306a36Sopenharmony_ci} 135562306a36Sopenharmony_ci 135662306a36Sopenharmony_ciint tap_create_cdev(struct cdev *tap_cdev, dev_t *tap_major, 135762306a36Sopenharmony_ci const char *device_name, struct module *module) 135862306a36Sopenharmony_ci{ 135962306a36Sopenharmony_ci int err; 136062306a36Sopenharmony_ci 136162306a36Sopenharmony_ci err = alloc_chrdev_region(tap_major, 0, TAP_NUM_DEVS, device_name); 136262306a36Sopenharmony_ci if (err) 136362306a36Sopenharmony_ci goto out1; 136462306a36Sopenharmony_ci 136562306a36Sopenharmony_ci cdev_init(tap_cdev, &tap_fops); 136662306a36Sopenharmony_ci tap_cdev->owner = module; 136762306a36Sopenharmony_ci err = cdev_add(tap_cdev, *tap_major, TAP_NUM_DEVS); 136862306a36Sopenharmony_ci if (err) 136962306a36Sopenharmony_ci goto out2; 137062306a36Sopenharmony_ci 137162306a36Sopenharmony_ci err = tap_list_add(*tap_major, device_name); 137262306a36Sopenharmony_ci if (err) 137362306a36Sopenharmony_ci goto out3; 137462306a36Sopenharmony_ci 137562306a36Sopenharmony_ci return 0; 137662306a36Sopenharmony_ci 137762306a36Sopenharmony_ciout3: 137862306a36Sopenharmony_ci cdev_del(tap_cdev); 137962306a36Sopenharmony_ciout2: 138062306a36Sopenharmony_ci unregister_chrdev_region(*tap_major, TAP_NUM_DEVS); 138162306a36Sopenharmony_ciout1: 138262306a36Sopenharmony_ci return err; 138362306a36Sopenharmony_ci} 138462306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(tap_create_cdev); 138562306a36Sopenharmony_ci 138662306a36Sopenharmony_civoid tap_destroy_cdev(dev_t major, struct cdev *tap_cdev) 138762306a36Sopenharmony_ci{ 138862306a36Sopenharmony_ci struct major_info *tap_major, *tmp; 138962306a36Sopenharmony_ci 139062306a36Sopenharmony_ci cdev_del(tap_cdev); 139162306a36Sopenharmony_ci unregister_chrdev_region(major, TAP_NUM_DEVS); 139262306a36Sopenharmony_ci list_for_each_entry_safe(tap_major, tmp, &major_list, next) { 139362306a36Sopenharmony_ci if (tap_major->major == MAJOR(major)) { 139462306a36Sopenharmony_ci idr_destroy(&tap_major->minor_idr); 139562306a36Sopenharmony_ci list_del_rcu(&tap_major->next); 139662306a36Sopenharmony_ci kfree_rcu(tap_major, rcu); 139762306a36Sopenharmony_ci } 139862306a36Sopenharmony_ci } 139962306a36Sopenharmony_ci} 140062306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(tap_destroy_cdev); 140162306a36Sopenharmony_ci 140262306a36Sopenharmony_ciMODULE_AUTHOR("Arnd Bergmann <arnd@arndb.de>"); 140362306a36Sopenharmony_ciMODULE_AUTHOR("Sainath Grandhi <sainath.grandhi@intel.com>"); 140462306a36Sopenharmony_ciMODULE_LICENSE("GPL"); 1405