162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 262306a36Sopenharmony_ci/* Copyright (C) 2009 Red Hat, Inc. 362306a36Sopenharmony_ci * Author: Michael S. Tsirkin <mst@redhat.com> 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * virtio-net server in host kernel. 662306a36Sopenharmony_ci */ 762306a36Sopenharmony_ci 862306a36Sopenharmony_ci#include <linux/compat.h> 962306a36Sopenharmony_ci#include <linux/eventfd.h> 1062306a36Sopenharmony_ci#include <linux/vhost.h> 1162306a36Sopenharmony_ci#include <linux/virtio_net.h> 1262306a36Sopenharmony_ci#include <linux/miscdevice.h> 1362306a36Sopenharmony_ci#include <linux/module.h> 1462306a36Sopenharmony_ci#include <linux/moduleparam.h> 1562306a36Sopenharmony_ci#include <linux/mutex.h> 1662306a36Sopenharmony_ci#include <linux/workqueue.h> 1762306a36Sopenharmony_ci#include <linux/file.h> 1862306a36Sopenharmony_ci#include <linux/slab.h> 1962306a36Sopenharmony_ci#include <linux/sched/clock.h> 2062306a36Sopenharmony_ci#include <linux/sched/signal.h> 2162306a36Sopenharmony_ci#include <linux/vmalloc.h> 2262306a36Sopenharmony_ci 2362306a36Sopenharmony_ci#include <linux/net.h> 2462306a36Sopenharmony_ci#include <linux/if_packet.h> 2562306a36Sopenharmony_ci#include <linux/if_arp.h> 2662306a36Sopenharmony_ci#include <linux/if_tun.h> 2762306a36Sopenharmony_ci#include <linux/if_macvlan.h> 2862306a36Sopenharmony_ci#include <linux/if_tap.h> 2962306a36Sopenharmony_ci#include <linux/if_vlan.h> 3062306a36Sopenharmony_ci#include <linux/skb_array.h> 3162306a36Sopenharmony_ci#include <linux/skbuff.h> 3262306a36Sopenharmony_ci 3362306a36Sopenharmony_ci#include <net/sock.h> 3462306a36Sopenharmony_ci#include <net/xdp.h> 3562306a36Sopenharmony_ci 3662306a36Sopenharmony_ci#include "vhost.h" 3762306a36Sopenharmony_ci 3862306a36Sopenharmony_cistatic int experimental_zcopytx = 0; 3962306a36Sopenharmony_cimodule_param(experimental_zcopytx, int, 0444); 4062306a36Sopenharmony_ciMODULE_PARM_DESC(experimental_zcopytx, "Enable Zero Copy TX;" 4162306a36Sopenharmony_ci " 1 -Enable; 0 - Disable"); 4262306a36Sopenharmony_ci 4362306a36Sopenharmony_ci/* Max number of bytes transferred before requeueing the job. 4462306a36Sopenharmony_ci * Using this limit prevents one virtqueue from starving others. */ 4562306a36Sopenharmony_ci#define VHOST_NET_WEIGHT 0x80000 4662306a36Sopenharmony_ci 4762306a36Sopenharmony_ci/* Max number of packets transferred before requeueing the job. 4862306a36Sopenharmony_ci * Using this limit prevents one virtqueue from starving others with small 4962306a36Sopenharmony_ci * pkts. 5062306a36Sopenharmony_ci */ 5162306a36Sopenharmony_ci#define VHOST_NET_PKT_WEIGHT 256 5262306a36Sopenharmony_ci 5362306a36Sopenharmony_ci/* MAX number of TX used buffers for outstanding zerocopy */ 5462306a36Sopenharmony_ci#define VHOST_MAX_PEND 128 5562306a36Sopenharmony_ci#define VHOST_GOODCOPY_LEN 256 5662306a36Sopenharmony_ci 5762306a36Sopenharmony_ci/* 5862306a36Sopenharmony_ci * For transmit, used buffer len is unused; we override it to track buffer 5962306a36Sopenharmony_ci * status internally; used for zerocopy tx only. 6062306a36Sopenharmony_ci */ 6162306a36Sopenharmony_ci/* Lower device DMA failed */ 6262306a36Sopenharmony_ci#define VHOST_DMA_FAILED_LEN ((__force __virtio32)3) 6362306a36Sopenharmony_ci/* Lower device DMA done */ 6462306a36Sopenharmony_ci#define VHOST_DMA_DONE_LEN ((__force __virtio32)2) 6562306a36Sopenharmony_ci/* Lower device DMA in progress */ 6662306a36Sopenharmony_ci#define VHOST_DMA_IN_PROGRESS ((__force __virtio32)1) 6762306a36Sopenharmony_ci/* Buffer unused */ 6862306a36Sopenharmony_ci#define VHOST_DMA_CLEAR_LEN ((__force __virtio32)0) 6962306a36Sopenharmony_ci 7062306a36Sopenharmony_ci#define VHOST_DMA_IS_DONE(len) ((__force u32)(len) >= (__force u32)VHOST_DMA_DONE_LEN) 7162306a36Sopenharmony_ci 7262306a36Sopenharmony_cienum { 7362306a36Sopenharmony_ci VHOST_NET_FEATURES = VHOST_FEATURES | 7462306a36Sopenharmony_ci (1ULL << VHOST_NET_F_VIRTIO_NET_HDR) | 7562306a36Sopenharmony_ci (1ULL << VIRTIO_NET_F_MRG_RXBUF) | 7662306a36Sopenharmony_ci (1ULL << VIRTIO_F_ACCESS_PLATFORM) | 7762306a36Sopenharmony_ci (1ULL << VIRTIO_F_RING_RESET) 7862306a36Sopenharmony_ci}; 7962306a36Sopenharmony_ci 8062306a36Sopenharmony_cienum { 8162306a36Sopenharmony_ci VHOST_NET_BACKEND_FEATURES = (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2) 8262306a36Sopenharmony_ci}; 8362306a36Sopenharmony_ci 8462306a36Sopenharmony_cienum { 8562306a36Sopenharmony_ci VHOST_NET_VQ_RX = 0, 8662306a36Sopenharmony_ci VHOST_NET_VQ_TX = 1, 8762306a36Sopenharmony_ci VHOST_NET_VQ_MAX = 2, 8862306a36Sopenharmony_ci}; 8962306a36Sopenharmony_ci 9062306a36Sopenharmony_cistruct vhost_net_ubuf_ref { 9162306a36Sopenharmony_ci /* refcount follows semantics similar to kref: 9262306a36Sopenharmony_ci * 0: object is released 9362306a36Sopenharmony_ci * 1: no outstanding ubufs 9462306a36Sopenharmony_ci * >1: outstanding ubufs 9562306a36Sopenharmony_ci */ 9662306a36Sopenharmony_ci atomic_t refcount; 9762306a36Sopenharmony_ci wait_queue_head_t wait; 9862306a36Sopenharmony_ci struct vhost_virtqueue *vq; 9962306a36Sopenharmony_ci}; 10062306a36Sopenharmony_ci 10162306a36Sopenharmony_ci#define VHOST_NET_BATCH 64 10262306a36Sopenharmony_cistruct vhost_net_buf { 10362306a36Sopenharmony_ci void **queue; 10462306a36Sopenharmony_ci int tail; 10562306a36Sopenharmony_ci int head; 10662306a36Sopenharmony_ci}; 10762306a36Sopenharmony_ci 10862306a36Sopenharmony_cistruct vhost_net_virtqueue { 10962306a36Sopenharmony_ci struct vhost_virtqueue vq; 11062306a36Sopenharmony_ci size_t vhost_hlen; 11162306a36Sopenharmony_ci size_t sock_hlen; 11262306a36Sopenharmony_ci /* vhost zerocopy support fields below: */ 11362306a36Sopenharmony_ci /* last used idx for outstanding DMA zerocopy buffers */ 11462306a36Sopenharmony_ci int upend_idx; 11562306a36Sopenharmony_ci /* For TX, first used idx for DMA done zerocopy buffers 11662306a36Sopenharmony_ci * For RX, number of batched heads 11762306a36Sopenharmony_ci */ 11862306a36Sopenharmony_ci int done_idx; 11962306a36Sopenharmony_ci /* Number of XDP frames batched */ 12062306a36Sopenharmony_ci int batched_xdp; 12162306a36Sopenharmony_ci /* an array of userspace buffers info */ 12262306a36Sopenharmony_ci struct ubuf_info_msgzc *ubuf_info; 12362306a36Sopenharmony_ci /* Reference counting for outstanding ubufs. 12462306a36Sopenharmony_ci * Protected by vq mutex. Writers must also take device mutex. */ 12562306a36Sopenharmony_ci struct vhost_net_ubuf_ref *ubufs; 12662306a36Sopenharmony_ci struct ptr_ring *rx_ring; 12762306a36Sopenharmony_ci struct vhost_net_buf rxq; 12862306a36Sopenharmony_ci /* Batched XDP buffs */ 12962306a36Sopenharmony_ci struct xdp_buff *xdp; 13062306a36Sopenharmony_ci}; 13162306a36Sopenharmony_ci 13262306a36Sopenharmony_cistruct vhost_net { 13362306a36Sopenharmony_ci struct vhost_dev dev; 13462306a36Sopenharmony_ci struct vhost_net_virtqueue vqs[VHOST_NET_VQ_MAX]; 13562306a36Sopenharmony_ci struct vhost_poll poll[VHOST_NET_VQ_MAX]; 13662306a36Sopenharmony_ci /* Number of TX recently submitted. 13762306a36Sopenharmony_ci * Protected by tx vq lock. */ 13862306a36Sopenharmony_ci unsigned tx_packets; 13962306a36Sopenharmony_ci /* Number of times zerocopy TX recently failed. 14062306a36Sopenharmony_ci * Protected by tx vq lock. */ 14162306a36Sopenharmony_ci unsigned tx_zcopy_err; 14262306a36Sopenharmony_ci /* Flush in progress. Protected by tx vq lock. */ 14362306a36Sopenharmony_ci bool tx_flush; 14462306a36Sopenharmony_ci /* Private page frag */ 14562306a36Sopenharmony_ci struct page_frag page_frag; 14662306a36Sopenharmony_ci /* Refcount bias of page frag */ 14762306a36Sopenharmony_ci int refcnt_bias; 14862306a36Sopenharmony_ci}; 14962306a36Sopenharmony_ci 15062306a36Sopenharmony_cistatic unsigned vhost_net_zcopy_mask __read_mostly; 15162306a36Sopenharmony_ci 15262306a36Sopenharmony_cistatic void *vhost_net_buf_get_ptr(struct vhost_net_buf *rxq) 15362306a36Sopenharmony_ci{ 15462306a36Sopenharmony_ci if (rxq->tail != rxq->head) 15562306a36Sopenharmony_ci return rxq->queue[rxq->head]; 15662306a36Sopenharmony_ci else 15762306a36Sopenharmony_ci return NULL; 15862306a36Sopenharmony_ci} 15962306a36Sopenharmony_ci 16062306a36Sopenharmony_cistatic int vhost_net_buf_get_size(struct vhost_net_buf *rxq) 16162306a36Sopenharmony_ci{ 16262306a36Sopenharmony_ci return rxq->tail - rxq->head; 16362306a36Sopenharmony_ci} 16462306a36Sopenharmony_ci 16562306a36Sopenharmony_cistatic int vhost_net_buf_is_empty(struct vhost_net_buf *rxq) 16662306a36Sopenharmony_ci{ 16762306a36Sopenharmony_ci return rxq->tail == rxq->head; 16862306a36Sopenharmony_ci} 16962306a36Sopenharmony_ci 17062306a36Sopenharmony_cistatic void *vhost_net_buf_consume(struct vhost_net_buf *rxq) 17162306a36Sopenharmony_ci{ 17262306a36Sopenharmony_ci void *ret = vhost_net_buf_get_ptr(rxq); 17362306a36Sopenharmony_ci ++rxq->head; 17462306a36Sopenharmony_ci return ret; 17562306a36Sopenharmony_ci} 17662306a36Sopenharmony_ci 17762306a36Sopenharmony_cistatic int vhost_net_buf_produce(struct vhost_net_virtqueue *nvq) 17862306a36Sopenharmony_ci{ 17962306a36Sopenharmony_ci struct vhost_net_buf *rxq = &nvq->rxq; 18062306a36Sopenharmony_ci 18162306a36Sopenharmony_ci rxq->head = 0; 18262306a36Sopenharmony_ci rxq->tail = ptr_ring_consume_batched(nvq->rx_ring, rxq->queue, 18362306a36Sopenharmony_ci VHOST_NET_BATCH); 18462306a36Sopenharmony_ci return rxq->tail; 18562306a36Sopenharmony_ci} 18662306a36Sopenharmony_ci 18762306a36Sopenharmony_cistatic void vhost_net_buf_unproduce(struct vhost_net_virtqueue *nvq) 18862306a36Sopenharmony_ci{ 18962306a36Sopenharmony_ci struct vhost_net_buf *rxq = &nvq->rxq; 19062306a36Sopenharmony_ci 19162306a36Sopenharmony_ci if (nvq->rx_ring && !vhost_net_buf_is_empty(rxq)) { 19262306a36Sopenharmony_ci ptr_ring_unconsume(nvq->rx_ring, rxq->queue + rxq->head, 19362306a36Sopenharmony_ci vhost_net_buf_get_size(rxq), 19462306a36Sopenharmony_ci tun_ptr_free); 19562306a36Sopenharmony_ci rxq->head = rxq->tail = 0; 19662306a36Sopenharmony_ci } 19762306a36Sopenharmony_ci} 19862306a36Sopenharmony_ci 19962306a36Sopenharmony_cistatic int vhost_net_buf_peek_len(void *ptr) 20062306a36Sopenharmony_ci{ 20162306a36Sopenharmony_ci if (tun_is_xdp_frame(ptr)) { 20262306a36Sopenharmony_ci struct xdp_frame *xdpf = tun_ptr_to_xdp(ptr); 20362306a36Sopenharmony_ci 20462306a36Sopenharmony_ci return xdpf->len; 20562306a36Sopenharmony_ci } 20662306a36Sopenharmony_ci 20762306a36Sopenharmony_ci return __skb_array_len_with_tag(ptr); 20862306a36Sopenharmony_ci} 20962306a36Sopenharmony_ci 21062306a36Sopenharmony_cistatic int vhost_net_buf_peek(struct vhost_net_virtqueue *nvq) 21162306a36Sopenharmony_ci{ 21262306a36Sopenharmony_ci struct vhost_net_buf *rxq = &nvq->rxq; 21362306a36Sopenharmony_ci 21462306a36Sopenharmony_ci if (!vhost_net_buf_is_empty(rxq)) 21562306a36Sopenharmony_ci goto out; 21662306a36Sopenharmony_ci 21762306a36Sopenharmony_ci if (!vhost_net_buf_produce(nvq)) 21862306a36Sopenharmony_ci return 0; 21962306a36Sopenharmony_ci 22062306a36Sopenharmony_ciout: 22162306a36Sopenharmony_ci return vhost_net_buf_peek_len(vhost_net_buf_get_ptr(rxq)); 22262306a36Sopenharmony_ci} 22362306a36Sopenharmony_ci 22462306a36Sopenharmony_cistatic void vhost_net_buf_init(struct vhost_net_buf *rxq) 22562306a36Sopenharmony_ci{ 22662306a36Sopenharmony_ci rxq->head = rxq->tail = 0; 22762306a36Sopenharmony_ci} 22862306a36Sopenharmony_ci 22962306a36Sopenharmony_cistatic void vhost_net_enable_zcopy(int vq) 23062306a36Sopenharmony_ci{ 23162306a36Sopenharmony_ci vhost_net_zcopy_mask |= 0x1 << vq; 23262306a36Sopenharmony_ci} 23362306a36Sopenharmony_ci 23462306a36Sopenharmony_cistatic struct vhost_net_ubuf_ref * 23562306a36Sopenharmony_civhost_net_ubuf_alloc(struct vhost_virtqueue *vq, bool zcopy) 23662306a36Sopenharmony_ci{ 23762306a36Sopenharmony_ci struct vhost_net_ubuf_ref *ubufs; 23862306a36Sopenharmony_ci /* No zero copy backend? Nothing to count. */ 23962306a36Sopenharmony_ci if (!zcopy) 24062306a36Sopenharmony_ci return NULL; 24162306a36Sopenharmony_ci ubufs = kmalloc(sizeof(*ubufs), GFP_KERNEL); 24262306a36Sopenharmony_ci if (!ubufs) 24362306a36Sopenharmony_ci return ERR_PTR(-ENOMEM); 24462306a36Sopenharmony_ci atomic_set(&ubufs->refcount, 1); 24562306a36Sopenharmony_ci init_waitqueue_head(&ubufs->wait); 24662306a36Sopenharmony_ci ubufs->vq = vq; 24762306a36Sopenharmony_ci return ubufs; 24862306a36Sopenharmony_ci} 24962306a36Sopenharmony_ci 25062306a36Sopenharmony_cistatic int vhost_net_ubuf_put(struct vhost_net_ubuf_ref *ubufs) 25162306a36Sopenharmony_ci{ 25262306a36Sopenharmony_ci int r = atomic_sub_return(1, &ubufs->refcount); 25362306a36Sopenharmony_ci if (unlikely(!r)) 25462306a36Sopenharmony_ci wake_up(&ubufs->wait); 25562306a36Sopenharmony_ci return r; 25662306a36Sopenharmony_ci} 25762306a36Sopenharmony_ci 25862306a36Sopenharmony_cistatic void vhost_net_ubuf_put_and_wait(struct vhost_net_ubuf_ref *ubufs) 25962306a36Sopenharmony_ci{ 26062306a36Sopenharmony_ci vhost_net_ubuf_put(ubufs); 26162306a36Sopenharmony_ci wait_event(ubufs->wait, !atomic_read(&ubufs->refcount)); 26262306a36Sopenharmony_ci} 26362306a36Sopenharmony_ci 26462306a36Sopenharmony_cistatic void vhost_net_ubuf_put_wait_and_free(struct vhost_net_ubuf_ref *ubufs) 26562306a36Sopenharmony_ci{ 26662306a36Sopenharmony_ci vhost_net_ubuf_put_and_wait(ubufs); 26762306a36Sopenharmony_ci kfree(ubufs); 26862306a36Sopenharmony_ci} 26962306a36Sopenharmony_ci 27062306a36Sopenharmony_cistatic void vhost_net_clear_ubuf_info(struct vhost_net *n) 27162306a36Sopenharmony_ci{ 27262306a36Sopenharmony_ci int i; 27362306a36Sopenharmony_ci 27462306a36Sopenharmony_ci for (i = 0; i < VHOST_NET_VQ_MAX; ++i) { 27562306a36Sopenharmony_ci kfree(n->vqs[i].ubuf_info); 27662306a36Sopenharmony_ci n->vqs[i].ubuf_info = NULL; 27762306a36Sopenharmony_ci } 27862306a36Sopenharmony_ci} 27962306a36Sopenharmony_ci 28062306a36Sopenharmony_cistatic int vhost_net_set_ubuf_info(struct vhost_net *n) 28162306a36Sopenharmony_ci{ 28262306a36Sopenharmony_ci bool zcopy; 28362306a36Sopenharmony_ci int i; 28462306a36Sopenharmony_ci 28562306a36Sopenharmony_ci for (i = 0; i < VHOST_NET_VQ_MAX; ++i) { 28662306a36Sopenharmony_ci zcopy = vhost_net_zcopy_mask & (0x1 << i); 28762306a36Sopenharmony_ci if (!zcopy) 28862306a36Sopenharmony_ci continue; 28962306a36Sopenharmony_ci n->vqs[i].ubuf_info = 29062306a36Sopenharmony_ci kmalloc_array(UIO_MAXIOV, 29162306a36Sopenharmony_ci sizeof(*n->vqs[i].ubuf_info), 29262306a36Sopenharmony_ci GFP_KERNEL); 29362306a36Sopenharmony_ci if (!n->vqs[i].ubuf_info) 29462306a36Sopenharmony_ci goto err; 29562306a36Sopenharmony_ci } 29662306a36Sopenharmony_ci return 0; 29762306a36Sopenharmony_ci 29862306a36Sopenharmony_cierr: 29962306a36Sopenharmony_ci vhost_net_clear_ubuf_info(n); 30062306a36Sopenharmony_ci return -ENOMEM; 30162306a36Sopenharmony_ci} 30262306a36Sopenharmony_ci 30362306a36Sopenharmony_cistatic void vhost_net_vq_reset(struct vhost_net *n) 30462306a36Sopenharmony_ci{ 30562306a36Sopenharmony_ci int i; 30662306a36Sopenharmony_ci 30762306a36Sopenharmony_ci vhost_net_clear_ubuf_info(n); 30862306a36Sopenharmony_ci 30962306a36Sopenharmony_ci for (i = 0; i < VHOST_NET_VQ_MAX; i++) { 31062306a36Sopenharmony_ci n->vqs[i].done_idx = 0; 31162306a36Sopenharmony_ci n->vqs[i].upend_idx = 0; 31262306a36Sopenharmony_ci n->vqs[i].ubufs = NULL; 31362306a36Sopenharmony_ci n->vqs[i].vhost_hlen = 0; 31462306a36Sopenharmony_ci n->vqs[i].sock_hlen = 0; 31562306a36Sopenharmony_ci vhost_net_buf_init(&n->vqs[i].rxq); 31662306a36Sopenharmony_ci } 31762306a36Sopenharmony_ci 31862306a36Sopenharmony_ci} 31962306a36Sopenharmony_ci 32062306a36Sopenharmony_cistatic void vhost_net_tx_packet(struct vhost_net *net) 32162306a36Sopenharmony_ci{ 32262306a36Sopenharmony_ci ++net->tx_packets; 32362306a36Sopenharmony_ci if (net->tx_packets < 1024) 32462306a36Sopenharmony_ci return; 32562306a36Sopenharmony_ci net->tx_packets = 0; 32662306a36Sopenharmony_ci net->tx_zcopy_err = 0; 32762306a36Sopenharmony_ci} 32862306a36Sopenharmony_ci 32962306a36Sopenharmony_cistatic void vhost_net_tx_err(struct vhost_net *net) 33062306a36Sopenharmony_ci{ 33162306a36Sopenharmony_ci ++net->tx_zcopy_err; 33262306a36Sopenharmony_ci} 33362306a36Sopenharmony_ci 33462306a36Sopenharmony_cistatic bool vhost_net_tx_select_zcopy(struct vhost_net *net) 33562306a36Sopenharmony_ci{ 33662306a36Sopenharmony_ci /* TX flush waits for outstanding DMAs to be done. 33762306a36Sopenharmony_ci * Don't start new DMAs. 33862306a36Sopenharmony_ci */ 33962306a36Sopenharmony_ci return !net->tx_flush && 34062306a36Sopenharmony_ci net->tx_packets / 64 >= net->tx_zcopy_err; 34162306a36Sopenharmony_ci} 34262306a36Sopenharmony_ci 34362306a36Sopenharmony_cistatic bool vhost_sock_zcopy(struct socket *sock) 34462306a36Sopenharmony_ci{ 34562306a36Sopenharmony_ci return unlikely(experimental_zcopytx) && 34662306a36Sopenharmony_ci sock_flag(sock->sk, SOCK_ZEROCOPY); 34762306a36Sopenharmony_ci} 34862306a36Sopenharmony_ci 34962306a36Sopenharmony_cistatic bool vhost_sock_xdp(struct socket *sock) 35062306a36Sopenharmony_ci{ 35162306a36Sopenharmony_ci return sock_flag(sock->sk, SOCK_XDP); 35262306a36Sopenharmony_ci} 35362306a36Sopenharmony_ci 35462306a36Sopenharmony_ci/* In case of DMA done not in order in lower device driver for some reason. 35562306a36Sopenharmony_ci * upend_idx is used to track end of used idx, done_idx is used to track head 35662306a36Sopenharmony_ci * of used idx. Once lower device DMA done contiguously, we will signal KVM 35762306a36Sopenharmony_ci * guest used idx. 35862306a36Sopenharmony_ci */ 35962306a36Sopenharmony_cistatic void vhost_zerocopy_signal_used(struct vhost_net *net, 36062306a36Sopenharmony_ci struct vhost_virtqueue *vq) 36162306a36Sopenharmony_ci{ 36262306a36Sopenharmony_ci struct vhost_net_virtqueue *nvq = 36362306a36Sopenharmony_ci container_of(vq, struct vhost_net_virtqueue, vq); 36462306a36Sopenharmony_ci int i, add; 36562306a36Sopenharmony_ci int j = 0; 36662306a36Sopenharmony_ci 36762306a36Sopenharmony_ci for (i = nvq->done_idx; i != nvq->upend_idx; i = (i + 1) % UIO_MAXIOV) { 36862306a36Sopenharmony_ci if (vq->heads[i].len == VHOST_DMA_FAILED_LEN) 36962306a36Sopenharmony_ci vhost_net_tx_err(net); 37062306a36Sopenharmony_ci if (VHOST_DMA_IS_DONE(vq->heads[i].len)) { 37162306a36Sopenharmony_ci vq->heads[i].len = VHOST_DMA_CLEAR_LEN; 37262306a36Sopenharmony_ci ++j; 37362306a36Sopenharmony_ci } else 37462306a36Sopenharmony_ci break; 37562306a36Sopenharmony_ci } 37662306a36Sopenharmony_ci while (j) { 37762306a36Sopenharmony_ci add = min(UIO_MAXIOV - nvq->done_idx, j); 37862306a36Sopenharmony_ci vhost_add_used_and_signal_n(vq->dev, vq, 37962306a36Sopenharmony_ci &vq->heads[nvq->done_idx], add); 38062306a36Sopenharmony_ci nvq->done_idx = (nvq->done_idx + add) % UIO_MAXIOV; 38162306a36Sopenharmony_ci j -= add; 38262306a36Sopenharmony_ci } 38362306a36Sopenharmony_ci} 38462306a36Sopenharmony_ci 38562306a36Sopenharmony_cistatic void vhost_zerocopy_callback(struct sk_buff *skb, 38662306a36Sopenharmony_ci struct ubuf_info *ubuf_base, bool success) 38762306a36Sopenharmony_ci{ 38862306a36Sopenharmony_ci struct ubuf_info_msgzc *ubuf = uarg_to_msgzc(ubuf_base); 38962306a36Sopenharmony_ci struct vhost_net_ubuf_ref *ubufs = ubuf->ctx; 39062306a36Sopenharmony_ci struct vhost_virtqueue *vq = ubufs->vq; 39162306a36Sopenharmony_ci int cnt; 39262306a36Sopenharmony_ci 39362306a36Sopenharmony_ci rcu_read_lock_bh(); 39462306a36Sopenharmony_ci 39562306a36Sopenharmony_ci /* set len to mark this desc buffers done DMA */ 39662306a36Sopenharmony_ci vq->heads[ubuf->desc].len = success ? 39762306a36Sopenharmony_ci VHOST_DMA_DONE_LEN : VHOST_DMA_FAILED_LEN; 39862306a36Sopenharmony_ci cnt = vhost_net_ubuf_put(ubufs); 39962306a36Sopenharmony_ci 40062306a36Sopenharmony_ci /* 40162306a36Sopenharmony_ci * Trigger polling thread if guest stopped submitting new buffers: 40262306a36Sopenharmony_ci * in this case, the refcount after decrement will eventually reach 1. 40362306a36Sopenharmony_ci * We also trigger polling periodically after each 16 packets 40462306a36Sopenharmony_ci * (the value 16 here is more or less arbitrary, it's tuned to trigger 40562306a36Sopenharmony_ci * less than 10% of times). 40662306a36Sopenharmony_ci */ 40762306a36Sopenharmony_ci if (cnt <= 1 || !(cnt % 16)) 40862306a36Sopenharmony_ci vhost_poll_queue(&vq->poll); 40962306a36Sopenharmony_ci 41062306a36Sopenharmony_ci rcu_read_unlock_bh(); 41162306a36Sopenharmony_ci} 41262306a36Sopenharmony_ci 41362306a36Sopenharmony_cistatic inline unsigned long busy_clock(void) 41462306a36Sopenharmony_ci{ 41562306a36Sopenharmony_ci return local_clock() >> 10; 41662306a36Sopenharmony_ci} 41762306a36Sopenharmony_ci 41862306a36Sopenharmony_cistatic bool vhost_can_busy_poll(unsigned long endtime) 41962306a36Sopenharmony_ci{ 42062306a36Sopenharmony_ci return likely(!need_resched() && !time_after(busy_clock(), endtime) && 42162306a36Sopenharmony_ci !signal_pending(current)); 42262306a36Sopenharmony_ci} 42362306a36Sopenharmony_ci 42462306a36Sopenharmony_cistatic void vhost_net_disable_vq(struct vhost_net *n, 42562306a36Sopenharmony_ci struct vhost_virtqueue *vq) 42662306a36Sopenharmony_ci{ 42762306a36Sopenharmony_ci struct vhost_net_virtqueue *nvq = 42862306a36Sopenharmony_ci container_of(vq, struct vhost_net_virtqueue, vq); 42962306a36Sopenharmony_ci struct vhost_poll *poll = n->poll + (nvq - n->vqs); 43062306a36Sopenharmony_ci if (!vhost_vq_get_backend(vq)) 43162306a36Sopenharmony_ci return; 43262306a36Sopenharmony_ci vhost_poll_stop(poll); 43362306a36Sopenharmony_ci} 43462306a36Sopenharmony_ci 43562306a36Sopenharmony_cistatic int vhost_net_enable_vq(struct vhost_net *n, 43662306a36Sopenharmony_ci struct vhost_virtqueue *vq) 43762306a36Sopenharmony_ci{ 43862306a36Sopenharmony_ci struct vhost_net_virtqueue *nvq = 43962306a36Sopenharmony_ci container_of(vq, struct vhost_net_virtqueue, vq); 44062306a36Sopenharmony_ci struct vhost_poll *poll = n->poll + (nvq - n->vqs); 44162306a36Sopenharmony_ci struct socket *sock; 44262306a36Sopenharmony_ci 44362306a36Sopenharmony_ci sock = vhost_vq_get_backend(vq); 44462306a36Sopenharmony_ci if (!sock) 44562306a36Sopenharmony_ci return 0; 44662306a36Sopenharmony_ci 44762306a36Sopenharmony_ci return vhost_poll_start(poll, sock->file); 44862306a36Sopenharmony_ci} 44962306a36Sopenharmony_ci 45062306a36Sopenharmony_cistatic void vhost_net_signal_used(struct vhost_net_virtqueue *nvq) 45162306a36Sopenharmony_ci{ 45262306a36Sopenharmony_ci struct vhost_virtqueue *vq = &nvq->vq; 45362306a36Sopenharmony_ci struct vhost_dev *dev = vq->dev; 45462306a36Sopenharmony_ci 45562306a36Sopenharmony_ci if (!nvq->done_idx) 45662306a36Sopenharmony_ci return; 45762306a36Sopenharmony_ci 45862306a36Sopenharmony_ci vhost_add_used_and_signal_n(dev, vq, vq->heads, nvq->done_idx); 45962306a36Sopenharmony_ci nvq->done_idx = 0; 46062306a36Sopenharmony_ci} 46162306a36Sopenharmony_ci 46262306a36Sopenharmony_cistatic void vhost_tx_batch(struct vhost_net *net, 46362306a36Sopenharmony_ci struct vhost_net_virtqueue *nvq, 46462306a36Sopenharmony_ci struct socket *sock, 46562306a36Sopenharmony_ci struct msghdr *msghdr) 46662306a36Sopenharmony_ci{ 46762306a36Sopenharmony_ci struct tun_msg_ctl ctl = { 46862306a36Sopenharmony_ci .type = TUN_MSG_PTR, 46962306a36Sopenharmony_ci .num = nvq->batched_xdp, 47062306a36Sopenharmony_ci .ptr = nvq->xdp, 47162306a36Sopenharmony_ci }; 47262306a36Sopenharmony_ci int i, err; 47362306a36Sopenharmony_ci 47462306a36Sopenharmony_ci if (nvq->batched_xdp == 0) 47562306a36Sopenharmony_ci goto signal_used; 47662306a36Sopenharmony_ci 47762306a36Sopenharmony_ci msghdr->msg_control = &ctl; 47862306a36Sopenharmony_ci msghdr->msg_controllen = sizeof(ctl); 47962306a36Sopenharmony_ci err = sock->ops->sendmsg(sock, msghdr, 0); 48062306a36Sopenharmony_ci if (unlikely(err < 0)) { 48162306a36Sopenharmony_ci vq_err(&nvq->vq, "Fail to batch sending packets\n"); 48262306a36Sopenharmony_ci 48362306a36Sopenharmony_ci /* free pages owned by XDP; since this is an unlikely error path, 48462306a36Sopenharmony_ci * keep it simple and avoid more complex bulk update for the 48562306a36Sopenharmony_ci * used pages 48662306a36Sopenharmony_ci */ 48762306a36Sopenharmony_ci for (i = 0; i < nvq->batched_xdp; ++i) 48862306a36Sopenharmony_ci put_page(virt_to_head_page(nvq->xdp[i].data)); 48962306a36Sopenharmony_ci nvq->batched_xdp = 0; 49062306a36Sopenharmony_ci nvq->done_idx = 0; 49162306a36Sopenharmony_ci return; 49262306a36Sopenharmony_ci } 49362306a36Sopenharmony_ci 49462306a36Sopenharmony_cisignal_used: 49562306a36Sopenharmony_ci vhost_net_signal_used(nvq); 49662306a36Sopenharmony_ci nvq->batched_xdp = 0; 49762306a36Sopenharmony_ci} 49862306a36Sopenharmony_ci 49962306a36Sopenharmony_cistatic int sock_has_rx_data(struct socket *sock) 50062306a36Sopenharmony_ci{ 50162306a36Sopenharmony_ci if (unlikely(!sock)) 50262306a36Sopenharmony_ci return 0; 50362306a36Sopenharmony_ci 50462306a36Sopenharmony_ci if (sock->ops->peek_len) 50562306a36Sopenharmony_ci return sock->ops->peek_len(sock); 50662306a36Sopenharmony_ci 50762306a36Sopenharmony_ci return skb_queue_empty(&sock->sk->sk_receive_queue); 50862306a36Sopenharmony_ci} 50962306a36Sopenharmony_ci 51062306a36Sopenharmony_cistatic void vhost_net_busy_poll_try_queue(struct vhost_net *net, 51162306a36Sopenharmony_ci struct vhost_virtqueue *vq) 51262306a36Sopenharmony_ci{ 51362306a36Sopenharmony_ci if (!vhost_vq_avail_empty(&net->dev, vq)) { 51462306a36Sopenharmony_ci vhost_poll_queue(&vq->poll); 51562306a36Sopenharmony_ci } else if (unlikely(vhost_enable_notify(&net->dev, vq))) { 51662306a36Sopenharmony_ci vhost_disable_notify(&net->dev, vq); 51762306a36Sopenharmony_ci vhost_poll_queue(&vq->poll); 51862306a36Sopenharmony_ci } 51962306a36Sopenharmony_ci} 52062306a36Sopenharmony_ci 52162306a36Sopenharmony_cistatic void vhost_net_busy_poll(struct vhost_net *net, 52262306a36Sopenharmony_ci struct vhost_virtqueue *rvq, 52362306a36Sopenharmony_ci struct vhost_virtqueue *tvq, 52462306a36Sopenharmony_ci bool *busyloop_intr, 52562306a36Sopenharmony_ci bool poll_rx) 52662306a36Sopenharmony_ci{ 52762306a36Sopenharmony_ci unsigned long busyloop_timeout; 52862306a36Sopenharmony_ci unsigned long endtime; 52962306a36Sopenharmony_ci struct socket *sock; 53062306a36Sopenharmony_ci struct vhost_virtqueue *vq = poll_rx ? tvq : rvq; 53162306a36Sopenharmony_ci 53262306a36Sopenharmony_ci /* Try to hold the vq mutex of the paired virtqueue. We can't 53362306a36Sopenharmony_ci * use mutex_lock() here since we could not guarantee a 53462306a36Sopenharmony_ci * consistenet lock ordering. 53562306a36Sopenharmony_ci */ 53662306a36Sopenharmony_ci if (!mutex_trylock(&vq->mutex)) 53762306a36Sopenharmony_ci return; 53862306a36Sopenharmony_ci 53962306a36Sopenharmony_ci vhost_disable_notify(&net->dev, vq); 54062306a36Sopenharmony_ci sock = vhost_vq_get_backend(rvq); 54162306a36Sopenharmony_ci 54262306a36Sopenharmony_ci busyloop_timeout = poll_rx ? rvq->busyloop_timeout: 54362306a36Sopenharmony_ci tvq->busyloop_timeout; 54462306a36Sopenharmony_ci 54562306a36Sopenharmony_ci preempt_disable(); 54662306a36Sopenharmony_ci endtime = busy_clock() + busyloop_timeout; 54762306a36Sopenharmony_ci 54862306a36Sopenharmony_ci while (vhost_can_busy_poll(endtime)) { 54962306a36Sopenharmony_ci if (vhost_vq_has_work(vq)) { 55062306a36Sopenharmony_ci *busyloop_intr = true; 55162306a36Sopenharmony_ci break; 55262306a36Sopenharmony_ci } 55362306a36Sopenharmony_ci 55462306a36Sopenharmony_ci if ((sock_has_rx_data(sock) && 55562306a36Sopenharmony_ci !vhost_vq_avail_empty(&net->dev, rvq)) || 55662306a36Sopenharmony_ci !vhost_vq_avail_empty(&net->dev, tvq)) 55762306a36Sopenharmony_ci break; 55862306a36Sopenharmony_ci 55962306a36Sopenharmony_ci cpu_relax(); 56062306a36Sopenharmony_ci } 56162306a36Sopenharmony_ci 56262306a36Sopenharmony_ci preempt_enable(); 56362306a36Sopenharmony_ci 56462306a36Sopenharmony_ci if (poll_rx || sock_has_rx_data(sock)) 56562306a36Sopenharmony_ci vhost_net_busy_poll_try_queue(net, vq); 56662306a36Sopenharmony_ci else if (!poll_rx) /* On tx here, sock has no rx data. */ 56762306a36Sopenharmony_ci vhost_enable_notify(&net->dev, rvq); 56862306a36Sopenharmony_ci 56962306a36Sopenharmony_ci mutex_unlock(&vq->mutex); 57062306a36Sopenharmony_ci} 57162306a36Sopenharmony_ci 57262306a36Sopenharmony_cistatic int vhost_net_tx_get_vq_desc(struct vhost_net *net, 57362306a36Sopenharmony_ci struct vhost_net_virtqueue *tnvq, 57462306a36Sopenharmony_ci unsigned int *out_num, unsigned int *in_num, 57562306a36Sopenharmony_ci struct msghdr *msghdr, bool *busyloop_intr) 57662306a36Sopenharmony_ci{ 57762306a36Sopenharmony_ci struct vhost_net_virtqueue *rnvq = &net->vqs[VHOST_NET_VQ_RX]; 57862306a36Sopenharmony_ci struct vhost_virtqueue *rvq = &rnvq->vq; 57962306a36Sopenharmony_ci struct vhost_virtqueue *tvq = &tnvq->vq; 58062306a36Sopenharmony_ci 58162306a36Sopenharmony_ci int r = vhost_get_vq_desc(tvq, tvq->iov, ARRAY_SIZE(tvq->iov), 58262306a36Sopenharmony_ci out_num, in_num, NULL, NULL); 58362306a36Sopenharmony_ci 58462306a36Sopenharmony_ci if (r == tvq->num && tvq->busyloop_timeout) { 58562306a36Sopenharmony_ci /* Flush batched packets first */ 58662306a36Sopenharmony_ci if (!vhost_sock_zcopy(vhost_vq_get_backend(tvq))) 58762306a36Sopenharmony_ci vhost_tx_batch(net, tnvq, 58862306a36Sopenharmony_ci vhost_vq_get_backend(tvq), 58962306a36Sopenharmony_ci msghdr); 59062306a36Sopenharmony_ci 59162306a36Sopenharmony_ci vhost_net_busy_poll(net, rvq, tvq, busyloop_intr, false); 59262306a36Sopenharmony_ci 59362306a36Sopenharmony_ci r = vhost_get_vq_desc(tvq, tvq->iov, ARRAY_SIZE(tvq->iov), 59462306a36Sopenharmony_ci out_num, in_num, NULL, NULL); 59562306a36Sopenharmony_ci } 59662306a36Sopenharmony_ci 59762306a36Sopenharmony_ci return r; 59862306a36Sopenharmony_ci} 59962306a36Sopenharmony_ci 60062306a36Sopenharmony_cistatic bool vhost_exceeds_maxpend(struct vhost_net *net) 60162306a36Sopenharmony_ci{ 60262306a36Sopenharmony_ci struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX]; 60362306a36Sopenharmony_ci struct vhost_virtqueue *vq = &nvq->vq; 60462306a36Sopenharmony_ci 60562306a36Sopenharmony_ci return (nvq->upend_idx + UIO_MAXIOV - nvq->done_idx) % UIO_MAXIOV > 60662306a36Sopenharmony_ci min_t(unsigned int, VHOST_MAX_PEND, vq->num >> 2); 60762306a36Sopenharmony_ci} 60862306a36Sopenharmony_ci 60962306a36Sopenharmony_cistatic size_t init_iov_iter(struct vhost_virtqueue *vq, struct iov_iter *iter, 61062306a36Sopenharmony_ci size_t hdr_size, int out) 61162306a36Sopenharmony_ci{ 61262306a36Sopenharmony_ci /* Skip header. TODO: support TSO. */ 61362306a36Sopenharmony_ci size_t len = iov_length(vq->iov, out); 61462306a36Sopenharmony_ci 61562306a36Sopenharmony_ci iov_iter_init(iter, ITER_SOURCE, vq->iov, out, len); 61662306a36Sopenharmony_ci iov_iter_advance(iter, hdr_size); 61762306a36Sopenharmony_ci 61862306a36Sopenharmony_ci return iov_iter_count(iter); 61962306a36Sopenharmony_ci} 62062306a36Sopenharmony_ci 62162306a36Sopenharmony_cistatic int get_tx_bufs(struct vhost_net *net, 62262306a36Sopenharmony_ci struct vhost_net_virtqueue *nvq, 62362306a36Sopenharmony_ci struct msghdr *msg, 62462306a36Sopenharmony_ci unsigned int *out, unsigned int *in, 62562306a36Sopenharmony_ci size_t *len, bool *busyloop_intr) 62662306a36Sopenharmony_ci{ 62762306a36Sopenharmony_ci struct vhost_virtqueue *vq = &nvq->vq; 62862306a36Sopenharmony_ci int ret; 62962306a36Sopenharmony_ci 63062306a36Sopenharmony_ci ret = vhost_net_tx_get_vq_desc(net, nvq, out, in, msg, busyloop_intr); 63162306a36Sopenharmony_ci 63262306a36Sopenharmony_ci if (ret < 0 || ret == vq->num) 63362306a36Sopenharmony_ci return ret; 63462306a36Sopenharmony_ci 63562306a36Sopenharmony_ci if (*in) { 63662306a36Sopenharmony_ci vq_err(vq, "Unexpected descriptor format for TX: out %d, int %d\n", 63762306a36Sopenharmony_ci *out, *in); 63862306a36Sopenharmony_ci return -EFAULT; 63962306a36Sopenharmony_ci } 64062306a36Sopenharmony_ci 64162306a36Sopenharmony_ci /* Sanity check */ 64262306a36Sopenharmony_ci *len = init_iov_iter(vq, &msg->msg_iter, nvq->vhost_hlen, *out); 64362306a36Sopenharmony_ci if (*len == 0) { 64462306a36Sopenharmony_ci vq_err(vq, "Unexpected header len for TX: %zd expected %zd\n", 64562306a36Sopenharmony_ci *len, nvq->vhost_hlen); 64662306a36Sopenharmony_ci return -EFAULT; 64762306a36Sopenharmony_ci } 64862306a36Sopenharmony_ci 64962306a36Sopenharmony_ci return ret; 65062306a36Sopenharmony_ci} 65162306a36Sopenharmony_ci 65262306a36Sopenharmony_cistatic bool tx_can_batch(struct vhost_virtqueue *vq, size_t total_len) 65362306a36Sopenharmony_ci{ 65462306a36Sopenharmony_ci return total_len < VHOST_NET_WEIGHT && 65562306a36Sopenharmony_ci !vhost_vq_avail_empty(vq->dev, vq); 65662306a36Sopenharmony_ci} 65762306a36Sopenharmony_ci 65862306a36Sopenharmony_cistatic bool vhost_net_page_frag_refill(struct vhost_net *net, unsigned int sz, 65962306a36Sopenharmony_ci struct page_frag *pfrag, gfp_t gfp) 66062306a36Sopenharmony_ci{ 66162306a36Sopenharmony_ci if (pfrag->page) { 66262306a36Sopenharmony_ci if (pfrag->offset + sz <= pfrag->size) 66362306a36Sopenharmony_ci return true; 66462306a36Sopenharmony_ci __page_frag_cache_drain(pfrag->page, net->refcnt_bias); 66562306a36Sopenharmony_ci } 66662306a36Sopenharmony_ci 66762306a36Sopenharmony_ci pfrag->offset = 0; 66862306a36Sopenharmony_ci net->refcnt_bias = 0; 66962306a36Sopenharmony_ci if (SKB_FRAG_PAGE_ORDER) { 67062306a36Sopenharmony_ci /* Avoid direct reclaim but allow kswapd to wake */ 67162306a36Sopenharmony_ci pfrag->page = alloc_pages((gfp & ~__GFP_DIRECT_RECLAIM) | 67262306a36Sopenharmony_ci __GFP_COMP | __GFP_NOWARN | 67362306a36Sopenharmony_ci __GFP_NORETRY, 67462306a36Sopenharmony_ci SKB_FRAG_PAGE_ORDER); 67562306a36Sopenharmony_ci if (likely(pfrag->page)) { 67662306a36Sopenharmony_ci pfrag->size = PAGE_SIZE << SKB_FRAG_PAGE_ORDER; 67762306a36Sopenharmony_ci goto done; 67862306a36Sopenharmony_ci } 67962306a36Sopenharmony_ci } 68062306a36Sopenharmony_ci pfrag->page = alloc_page(gfp); 68162306a36Sopenharmony_ci if (likely(pfrag->page)) { 68262306a36Sopenharmony_ci pfrag->size = PAGE_SIZE; 68362306a36Sopenharmony_ci goto done; 68462306a36Sopenharmony_ci } 68562306a36Sopenharmony_ci return false; 68662306a36Sopenharmony_ci 68762306a36Sopenharmony_cidone: 68862306a36Sopenharmony_ci net->refcnt_bias = USHRT_MAX; 68962306a36Sopenharmony_ci page_ref_add(pfrag->page, USHRT_MAX - 1); 69062306a36Sopenharmony_ci return true; 69162306a36Sopenharmony_ci} 69262306a36Sopenharmony_ci 69362306a36Sopenharmony_ci#define VHOST_NET_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD) 69462306a36Sopenharmony_ci 69562306a36Sopenharmony_cistatic int vhost_net_build_xdp(struct vhost_net_virtqueue *nvq, 69662306a36Sopenharmony_ci struct iov_iter *from) 69762306a36Sopenharmony_ci{ 69862306a36Sopenharmony_ci struct vhost_virtqueue *vq = &nvq->vq; 69962306a36Sopenharmony_ci struct vhost_net *net = container_of(vq->dev, struct vhost_net, 70062306a36Sopenharmony_ci dev); 70162306a36Sopenharmony_ci struct socket *sock = vhost_vq_get_backend(vq); 70262306a36Sopenharmony_ci struct page_frag *alloc_frag = &net->page_frag; 70362306a36Sopenharmony_ci struct virtio_net_hdr *gso; 70462306a36Sopenharmony_ci struct xdp_buff *xdp = &nvq->xdp[nvq->batched_xdp]; 70562306a36Sopenharmony_ci struct tun_xdp_hdr *hdr; 70662306a36Sopenharmony_ci size_t len = iov_iter_count(from); 70762306a36Sopenharmony_ci int headroom = vhost_sock_xdp(sock) ? XDP_PACKET_HEADROOM : 0; 70862306a36Sopenharmony_ci int buflen = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 70962306a36Sopenharmony_ci int pad = SKB_DATA_ALIGN(VHOST_NET_RX_PAD + headroom + nvq->sock_hlen); 71062306a36Sopenharmony_ci int sock_hlen = nvq->sock_hlen; 71162306a36Sopenharmony_ci void *buf; 71262306a36Sopenharmony_ci int copied; 71362306a36Sopenharmony_ci 71462306a36Sopenharmony_ci if (unlikely(len < nvq->sock_hlen)) 71562306a36Sopenharmony_ci return -EFAULT; 71662306a36Sopenharmony_ci 71762306a36Sopenharmony_ci if (SKB_DATA_ALIGN(len + pad) + 71862306a36Sopenharmony_ci SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) > PAGE_SIZE) 71962306a36Sopenharmony_ci return -ENOSPC; 72062306a36Sopenharmony_ci 72162306a36Sopenharmony_ci buflen += SKB_DATA_ALIGN(len + pad); 72262306a36Sopenharmony_ci alloc_frag->offset = ALIGN((u64)alloc_frag->offset, SMP_CACHE_BYTES); 72362306a36Sopenharmony_ci if (unlikely(!vhost_net_page_frag_refill(net, buflen, 72462306a36Sopenharmony_ci alloc_frag, GFP_KERNEL))) 72562306a36Sopenharmony_ci return -ENOMEM; 72662306a36Sopenharmony_ci 72762306a36Sopenharmony_ci buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset; 72862306a36Sopenharmony_ci copied = copy_page_from_iter(alloc_frag->page, 72962306a36Sopenharmony_ci alloc_frag->offset + 73062306a36Sopenharmony_ci offsetof(struct tun_xdp_hdr, gso), 73162306a36Sopenharmony_ci sock_hlen, from); 73262306a36Sopenharmony_ci if (copied != sock_hlen) 73362306a36Sopenharmony_ci return -EFAULT; 73462306a36Sopenharmony_ci 73562306a36Sopenharmony_ci hdr = buf; 73662306a36Sopenharmony_ci gso = &hdr->gso; 73762306a36Sopenharmony_ci 73862306a36Sopenharmony_ci if ((gso->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && 73962306a36Sopenharmony_ci vhost16_to_cpu(vq, gso->csum_start) + 74062306a36Sopenharmony_ci vhost16_to_cpu(vq, gso->csum_offset) + 2 > 74162306a36Sopenharmony_ci vhost16_to_cpu(vq, gso->hdr_len)) { 74262306a36Sopenharmony_ci gso->hdr_len = cpu_to_vhost16(vq, 74362306a36Sopenharmony_ci vhost16_to_cpu(vq, gso->csum_start) + 74462306a36Sopenharmony_ci vhost16_to_cpu(vq, gso->csum_offset) + 2); 74562306a36Sopenharmony_ci 74662306a36Sopenharmony_ci if (vhost16_to_cpu(vq, gso->hdr_len) > len) 74762306a36Sopenharmony_ci return -EINVAL; 74862306a36Sopenharmony_ci } 74962306a36Sopenharmony_ci 75062306a36Sopenharmony_ci len -= sock_hlen; 75162306a36Sopenharmony_ci copied = copy_page_from_iter(alloc_frag->page, 75262306a36Sopenharmony_ci alloc_frag->offset + pad, 75362306a36Sopenharmony_ci len, from); 75462306a36Sopenharmony_ci if (copied != len) 75562306a36Sopenharmony_ci return -EFAULT; 75662306a36Sopenharmony_ci 75762306a36Sopenharmony_ci xdp_init_buff(xdp, buflen, NULL); 75862306a36Sopenharmony_ci xdp_prepare_buff(xdp, buf, pad, len, true); 75962306a36Sopenharmony_ci hdr->buflen = buflen; 76062306a36Sopenharmony_ci 76162306a36Sopenharmony_ci --net->refcnt_bias; 76262306a36Sopenharmony_ci alloc_frag->offset += buflen; 76362306a36Sopenharmony_ci 76462306a36Sopenharmony_ci ++nvq->batched_xdp; 76562306a36Sopenharmony_ci 76662306a36Sopenharmony_ci return 0; 76762306a36Sopenharmony_ci} 76862306a36Sopenharmony_ci 76962306a36Sopenharmony_cistatic void handle_tx_copy(struct vhost_net *net, struct socket *sock) 77062306a36Sopenharmony_ci{ 77162306a36Sopenharmony_ci struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX]; 77262306a36Sopenharmony_ci struct vhost_virtqueue *vq = &nvq->vq; 77362306a36Sopenharmony_ci unsigned out, in; 77462306a36Sopenharmony_ci int head; 77562306a36Sopenharmony_ci struct msghdr msg = { 77662306a36Sopenharmony_ci .msg_name = NULL, 77762306a36Sopenharmony_ci .msg_namelen = 0, 77862306a36Sopenharmony_ci .msg_control = NULL, 77962306a36Sopenharmony_ci .msg_controllen = 0, 78062306a36Sopenharmony_ci .msg_flags = MSG_DONTWAIT, 78162306a36Sopenharmony_ci }; 78262306a36Sopenharmony_ci size_t len, total_len = 0; 78362306a36Sopenharmony_ci int err; 78462306a36Sopenharmony_ci int sent_pkts = 0; 78562306a36Sopenharmony_ci bool sock_can_batch = (sock->sk->sk_sndbuf == INT_MAX); 78662306a36Sopenharmony_ci 78762306a36Sopenharmony_ci do { 78862306a36Sopenharmony_ci bool busyloop_intr = false; 78962306a36Sopenharmony_ci 79062306a36Sopenharmony_ci if (nvq->done_idx == VHOST_NET_BATCH) 79162306a36Sopenharmony_ci vhost_tx_batch(net, nvq, sock, &msg); 79262306a36Sopenharmony_ci 79362306a36Sopenharmony_ci head = get_tx_bufs(net, nvq, &msg, &out, &in, &len, 79462306a36Sopenharmony_ci &busyloop_intr); 79562306a36Sopenharmony_ci /* On error, stop handling until the next kick. */ 79662306a36Sopenharmony_ci if (unlikely(head < 0)) 79762306a36Sopenharmony_ci break; 79862306a36Sopenharmony_ci /* Nothing new? Wait for eventfd to tell us they refilled. */ 79962306a36Sopenharmony_ci if (head == vq->num) { 80062306a36Sopenharmony_ci if (unlikely(busyloop_intr)) { 80162306a36Sopenharmony_ci vhost_poll_queue(&vq->poll); 80262306a36Sopenharmony_ci } else if (unlikely(vhost_enable_notify(&net->dev, 80362306a36Sopenharmony_ci vq))) { 80462306a36Sopenharmony_ci vhost_disable_notify(&net->dev, vq); 80562306a36Sopenharmony_ci continue; 80662306a36Sopenharmony_ci } 80762306a36Sopenharmony_ci break; 80862306a36Sopenharmony_ci } 80962306a36Sopenharmony_ci 81062306a36Sopenharmony_ci total_len += len; 81162306a36Sopenharmony_ci 81262306a36Sopenharmony_ci /* For simplicity, TX batching is only enabled if 81362306a36Sopenharmony_ci * sndbuf is unlimited. 81462306a36Sopenharmony_ci */ 81562306a36Sopenharmony_ci if (sock_can_batch) { 81662306a36Sopenharmony_ci err = vhost_net_build_xdp(nvq, &msg.msg_iter); 81762306a36Sopenharmony_ci if (!err) { 81862306a36Sopenharmony_ci goto done; 81962306a36Sopenharmony_ci } else if (unlikely(err != -ENOSPC)) { 82062306a36Sopenharmony_ci vhost_tx_batch(net, nvq, sock, &msg); 82162306a36Sopenharmony_ci vhost_discard_vq_desc(vq, 1); 82262306a36Sopenharmony_ci vhost_net_enable_vq(net, vq); 82362306a36Sopenharmony_ci break; 82462306a36Sopenharmony_ci } 82562306a36Sopenharmony_ci 82662306a36Sopenharmony_ci /* We can't build XDP buff, go for single 82762306a36Sopenharmony_ci * packet path but let's flush batched 82862306a36Sopenharmony_ci * packets. 82962306a36Sopenharmony_ci */ 83062306a36Sopenharmony_ci vhost_tx_batch(net, nvq, sock, &msg); 83162306a36Sopenharmony_ci msg.msg_control = NULL; 83262306a36Sopenharmony_ci } else { 83362306a36Sopenharmony_ci if (tx_can_batch(vq, total_len)) 83462306a36Sopenharmony_ci msg.msg_flags |= MSG_MORE; 83562306a36Sopenharmony_ci else 83662306a36Sopenharmony_ci msg.msg_flags &= ~MSG_MORE; 83762306a36Sopenharmony_ci } 83862306a36Sopenharmony_ci 83962306a36Sopenharmony_ci err = sock->ops->sendmsg(sock, &msg, len); 84062306a36Sopenharmony_ci if (unlikely(err < 0)) { 84162306a36Sopenharmony_ci if (err == -EAGAIN || err == -ENOMEM || err == -ENOBUFS) { 84262306a36Sopenharmony_ci vhost_discard_vq_desc(vq, 1); 84362306a36Sopenharmony_ci vhost_net_enable_vq(net, vq); 84462306a36Sopenharmony_ci break; 84562306a36Sopenharmony_ci } 84662306a36Sopenharmony_ci pr_debug("Fail to send packet: err %d", err); 84762306a36Sopenharmony_ci } else if (unlikely(err != len)) 84862306a36Sopenharmony_ci pr_debug("Truncated TX packet: len %d != %zd\n", 84962306a36Sopenharmony_ci err, len); 85062306a36Sopenharmony_cidone: 85162306a36Sopenharmony_ci vq->heads[nvq->done_idx].id = cpu_to_vhost32(vq, head); 85262306a36Sopenharmony_ci vq->heads[nvq->done_idx].len = 0; 85362306a36Sopenharmony_ci ++nvq->done_idx; 85462306a36Sopenharmony_ci } while (likely(!vhost_exceeds_weight(vq, ++sent_pkts, total_len))); 85562306a36Sopenharmony_ci 85662306a36Sopenharmony_ci vhost_tx_batch(net, nvq, sock, &msg); 85762306a36Sopenharmony_ci} 85862306a36Sopenharmony_ci 85962306a36Sopenharmony_cistatic void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock) 86062306a36Sopenharmony_ci{ 86162306a36Sopenharmony_ci struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX]; 86262306a36Sopenharmony_ci struct vhost_virtqueue *vq = &nvq->vq; 86362306a36Sopenharmony_ci unsigned out, in; 86462306a36Sopenharmony_ci int head; 86562306a36Sopenharmony_ci struct msghdr msg = { 86662306a36Sopenharmony_ci .msg_name = NULL, 86762306a36Sopenharmony_ci .msg_namelen = 0, 86862306a36Sopenharmony_ci .msg_control = NULL, 86962306a36Sopenharmony_ci .msg_controllen = 0, 87062306a36Sopenharmony_ci .msg_flags = MSG_DONTWAIT, 87162306a36Sopenharmony_ci }; 87262306a36Sopenharmony_ci struct tun_msg_ctl ctl; 87362306a36Sopenharmony_ci size_t len, total_len = 0; 87462306a36Sopenharmony_ci int err; 87562306a36Sopenharmony_ci struct vhost_net_ubuf_ref *ubufs; 87662306a36Sopenharmony_ci struct ubuf_info_msgzc *ubuf; 87762306a36Sopenharmony_ci bool zcopy_used; 87862306a36Sopenharmony_ci int sent_pkts = 0; 87962306a36Sopenharmony_ci 88062306a36Sopenharmony_ci do { 88162306a36Sopenharmony_ci bool busyloop_intr; 88262306a36Sopenharmony_ci 88362306a36Sopenharmony_ci /* Release DMAs done buffers first */ 88462306a36Sopenharmony_ci vhost_zerocopy_signal_used(net, vq); 88562306a36Sopenharmony_ci 88662306a36Sopenharmony_ci busyloop_intr = false; 88762306a36Sopenharmony_ci head = get_tx_bufs(net, nvq, &msg, &out, &in, &len, 88862306a36Sopenharmony_ci &busyloop_intr); 88962306a36Sopenharmony_ci /* On error, stop handling until the next kick. */ 89062306a36Sopenharmony_ci if (unlikely(head < 0)) 89162306a36Sopenharmony_ci break; 89262306a36Sopenharmony_ci /* Nothing new? Wait for eventfd to tell us they refilled. */ 89362306a36Sopenharmony_ci if (head == vq->num) { 89462306a36Sopenharmony_ci if (unlikely(busyloop_intr)) { 89562306a36Sopenharmony_ci vhost_poll_queue(&vq->poll); 89662306a36Sopenharmony_ci } else if (unlikely(vhost_enable_notify(&net->dev, vq))) { 89762306a36Sopenharmony_ci vhost_disable_notify(&net->dev, vq); 89862306a36Sopenharmony_ci continue; 89962306a36Sopenharmony_ci } 90062306a36Sopenharmony_ci break; 90162306a36Sopenharmony_ci } 90262306a36Sopenharmony_ci 90362306a36Sopenharmony_ci zcopy_used = len >= VHOST_GOODCOPY_LEN 90462306a36Sopenharmony_ci && !vhost_exceeds_maxpend(net) 90562306a36Sopenharmony_ci && vhost_net_tx_select_zcopy(net); 90662306a36Sopenharmony_ci 90762306a36Sopenharmony_ci /* use msg_control to pass vhost zerocopy ubuf info to skb */ 90862306a36Sopenharmony_ci if (zcopy_used) { 90962306a36Sopenharmony_ci ubuf = nvq->ubuf_info + nvq->upend_idx; 91062306a36Sopenharmony_ci vq->heads[nvq->upend_idx].id = cpu_to_vhost32(vq, head); 91162306a36Sopenharmony_ci vq->heads[nvq->upend_idx].len = VHOST_DMA_IN_PROGRESS; 91262306a36Sopenharmony_ci ubuf->ctx = nvq->ubufs; 91362306a36Sopenharmony_ci ubuf->desc = nvq->upend_idx; 91462306a36Sopenharmony_ci ubuf->ubuf.callback = vhost_zerocopy_callback; 91562306a36Sopenharmony_ci ubuf->ubuf.flags = SKBFL_ZEROCOPY_FRAG; 91662306a36Sopenharmony_ci refcount_set(&ubuf->ubuf.refcnt, 1); 91762306a36Sopenharmony_ci msg.msg_control = &ctl; 91862306a36Sopenharmony_ci ctl.type = TUN_MSG_UBUF; 91962306a36Sopenharmony_ci ctl.ptr = &ubuf->ubuf; 92062306a36Sopenharmony_ci msg.msg_controllen = sizeof(ctl); 92162306a36Sopenharmony_ci ubufs = nvq->ubufs; 92262306a36Sopenharmony_ci atomic_inc(&ubufs->refcount); 92362306a36Sopenharmony_ci nvq->upend_idx = (nvq->upend_idx + 1) % UIO_MAXIOV; 92462306a36Sopenharmony_ci } else { 92562306a36Sopenharmony_ci msg.msg_control = NULL; 92662306a36Sopenharmony_ci ubufs = NULL; 92762306a36Sopenharmony_ci } 92862306a36Sopenharmony_ci total_len += len; 92962306a36Sopenharmony_ci if (tx_can_batch(vq, total_len) && 93062306a36Sopenharmony_ci likely(!vhost_exceeds_maxpend(net))) { 93162306a36Sopenharmony_ci msg.msg_flags |= MSG_MORE; 93262306a36Sopenharmony_ci } else { 93362306a36Sopenharmony_ci msg.msg_flags &= ~MSG_MORE; 93462306a36Sopenharmony_ci } 93562306a36Sopenharmony_ci 93662306a36Sopenharmony_ci err = sock->ops->sendmsg(sock, &msg, len); 93762306a36Sopenharmony_ci if (unlikely(err < 0)) { 93862306a36Sopenharmony_ci bool retry = err == -EAGAIN || err == -ENOMEM || err == -ENOBUFS; 93962306a36Sopenharmony_ci 94062306a36Sopenharmony_ci if (zcopy_used) { 94162306a36Sopenharmony_ci if (vq->heads[ubuf->desc].len == VHOST_DMA_IN_PROGRESS) 94262306a36Sopenharmony_ci vhost_net_ubuf_put(ubufs); 94362306a36Sopenharmony_ci if (retry) 94462306a36Sopenharmony_ci nvq->upend_idx = ((unsigned)nvq->upend_idx - 1) 94562306a36Sopenharmony_ci % UIO_MAXIOV; 94662306a36Sopenharmony_ci else 94762306a36Sopenharmony_ci vq->heads[ubuf->desc].len = VHOST_DMA_DONE_LEN; 94862306a36Sopenharmony_ci } 94962306a36Sopenharmony_ci if (retry) { 95062306a36Sopenharmony_ci vhost_discard_vq_desc(vq, 1); 95162306a36Sopenharmony_ci vhost_net_enable_vq(net, vq); 95262306a36Sopenharmony_ci break; 95362306a36Sopenharmony_ci } 95462306a36Sopenharmony_ci pr_debug("Fail to send packet: err %d", err); 95562306a36Sopenharmony_ci } else if (unlikely(err != len)) 95662306a36Sopenharmony_ci pr_debug("Truncated TX packet: " 95762306a36Sopenharmony_ci " len %d != %zd\n", err, len); 95862306a36Sopenharmony_ci if (!zcopy_used) 95962306a36Sopenharmony_ci vhost_add_used_and_signal(&net->dev, vq, head, 0); 96062306a36Sopenharmony_ci else 96162306a36Sopenharmony_ci vhost_zerocopy_signal_used(net, vq); 96262306a36Sopenharmony_ci vhost_net_tx_packet(net); 96362306a36Sopenharmony_ci } while (likely(!vhost_exceeds_weight(vq, ++sent_pkts, total_len))); 96462306a36Sopenharmony_ci} 96562306a36Sopenharmony_ci 96662306a36Sopenharmony_ci/* Expects to be always run from workqueue - which acts as 96762306a36Sopenharmony_ci * read-size critical section for our kind of RCU. */ 96862306a36Sopenharmony_cistatic void handle_tx(struct vhost_net *net) 96962306a36Sopenharmony_ci{ 97062306a36Sopenharmony_ci struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX]; 97162306a36Sopenharmony_ci struct vhost_virtqueue *vq = &nvq->vq; 97262306a36Sopenharmony_ci struct socket *sock; 97362306a36Sopenharmony_ci 97462306a36Sopenharmony_ci mutex_lock_nested(&vq->mutex, VHOST_NET_VQ_TX); 97562306a36Sopenharmony_ci sock = vhost_vq_get_backend(vq); 97662306a36Sopenharmony_ci if (!sock) 97762306a36Sopenharmony_ci goto out; 97862306a36Sopenharmony_ci 97962306a36Sopenharmony_ci if (!vq_meta_prefetch(vq)) 98062306a36Sopenharmony_ci goto out; 98162306a36Sopenharmony_ci 98262306a36Sopenharmony_ci vhost_disable_notify(&net->dev, vq); 98362306a36Sopenharmony_ci vhost_net_disable_vq(net, vq); 98462306a36Sopenharmony_ci 98562306a36Sopenharmony_ci if (vhost_sock_zcopy(sock)) 98662306a36Sopenharmony_ci handle_tx_zerocopy(net, sock); 98762306a36Sopenharmony_ci else 98862306a36Sopenharmony_ci handle_tx_copy(net, sock); 98962306a36Sopenharmony_ci 99062306a36Sopenharmony_ciout: 99162306a36Sopenharmony_ci mutex_unlock(&vq->mutex); 99262306a36Sopenharmony_ci} 99362306a36Sopenharmony_ci 99462306a36Sopenharmony_cistatic int peek_head_len(struct vhost_net_virtqueue *rvq, struct sock *sk) 99562306a36Sopenharmony_ci{ 99662306a36Sopenharmony_ci struct sk_buff *head; 99762306a36Sopenharmony_ci int len = 0; 99862306a36Sopenharmony_ci unsigned long flags; 99962306a36Sopenharmony_ci 100062306a36Sopenharmony_ci if (rvq->rx_ring) 100162306a36Sopenharmony_ci return vhost_net_buf_peek(rvq); 100262306a36Sopenharmony_ci 100362306a36Sopenharmony_ci spin_lock_irqsave(&sk->sk_receive_queue.lock, flags); 100462306a36Sopenharmony_ci head = skb_peek(&sk->sk_receive_queue); 100562306a36Sopenharmony_ci if (likely(head)) { 100662306a36Sopenharmony_ci len = head->len; 100762306a36Sopenharmony_ci if (skb_vlan_tag_present(head)) 100862306a36Sopenharmony_ci len += VLAN_HLEN; 100962306a36Sopenharmony_ci } 101062306a36Sopenharmony_ci 101162306a36Sopenharmony_ci spin_unlock_irqrestore(&sk->sk_receive_queue.lock, flags); 101262306a36Sopenharmony_ci return len; 101362306a36Sopenharmony_ci} 101462306a36Sopenharmony_ci 101562306a36Sopenharmony_cistatic int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk, 101662306a36Sopenharmony_ci bool *busyloop_intr) 101762306a36Sopenharmony_ci{ 101862306a36Sopenharmony_ci struct vhost_net_virtqueue *rnvq = &net->vqs[VHOST_NET_VQ_RX]; 101962306a36Sopenharmony_ci struct vhost_net_virtqueue *tnvq = &net->vqs[VHOST_NET_VQ_TX]; 102062306a36Sopenharmony_ci struct vhost_virtqueue *rvq = &rnvq->vq; 102162306a36Sopenharmony_ci struct vhost_virtqueue *tvq = &tnvq->vq; 102262306a36Sopenharmony_ci int len = peek_head_len(rnvq, sk); 102362306a36Sopenharmony_ci 102462306a36Sopenharmony_ci if (!len && rvq->busyloop_timeout) { 102562306a36Sopenharmony_ci /* Flush batched heads first */ 102662306a36Sopenharmony_ci vhost_net_signal_used(rnvq); 102762306a36Sopenharmony_ci /* Both tx vq and rx socket were polled here */ 102862306a36Sopenharmony_ci vhost_net_busy_poll(net, rvq, tvq, busyloop_intr, true); 102962306a36Sopenharmony_ci 103062306a36Sopenharmony_ci len = peek_head_len(rnvq, sk); 103162306a36Sopenharmony_ci } 103262306a36Sopenharmony_ci 103362306a36Sopenharmony_ci return len; 103462306a36Sopenharmony_ci} 103562306a36Sopenharmony_ci 103662306a36Sopenharmony_ci/* This is a multi-buffer version of vhost_get_desc, that works if 103762306a36Sopenharmony_ci * vq has read descriptors only. 103862306a36Sopenharmony_ci * @vq - the relevant virtqueue 103962306a36Sopenharmony_ci * @datalen - data length we'll be reading 104062306a36Sopenharmony_ci * @iovcount - returned count of io vectors we fill 104162306a36Sopenharmony_ci * @log - vhost log 104262306a36Sopenharmony_ci * @log_num - log offset 104362306a36Sopenharmony_ci * @quota - headcount quota, 1 for big buffer 104462306a36Sopenharmony_ci * returns number of buffer heads allocated, negative on error 104562306a36Sopenharmony_ci */ 104662306a36Sopenharmony_cistatic int get_rx_bufs(struct vhost_virtqueue *vq, 104762306a36Sopenharmony_ci struct vring_used_elem *heads, 104862306a36Sopenharmony_ci int datalen, 104962306a36Sopenharmony_ci unsigned *iovcount, 105062306a36Sopenharmony_ci struct vhost_log *log, 105162306a36Sopenharmony_ci unsigned *log_num, 105262306a36Sopenharmony_ci unsigned int quota) 105362306a36Sopenharmony_ci{ 105462306a36Sopenharmony_ci unsigned int out, in; 105562306a36Sopenharmony_ci int seg = 0; 105662306a36Sopenharmony_ci int headcount = 0; 105762306a36Sopenharmony_ci unsigned d; 105862306a36Sopenharmony_ci int r, nlogs = 0; 105962306a36Sopenharmony_ci /* len is always initialized before use since we are always called with 106062306a36Sopenharmony_ci * datalen > 0. 106162306a36Sopenharmony_ci */ 106262306a36Sopenharmony_ci u32 len; 106362306a36Sopenharmony_ci 106462306a36Sopenharmony_ci while (datalen > 0 && headcount < quota) { 106562306a36Sopenharmony_ci if (unlikely(seg >= UIO_MAXIOV)) { 106662306a36Sopenharmony_ci r = -ENOBUFS; 106762306a36Sopenharmony_ci goto err; 106862306a36Sopenharmony_ci } 106962306a36Sopenharmony_ci r = vhost_get_vq_desc(vq, vq->iov + seg, 107062306a36Sopenharmony_ci ARRAY_SIZE(vq->iov) - seg, &out, 107162306a36Sopenharmony_ci &in, log, log_num); 107262306a36Sopenharmony_ci if (unlikely(r < 0)) 107362306a36Sopenharmony_ci goto err; 107462306a36Sopenharmony_ci 107562306a36Sopenharmony_ci d = r; 107662306a36Sopenharmony_ci if (d == vq->num) { 107762306a36Sopenharmony_ci r = 0; 107862306a36Sopenharmony_ci goto err; 107962306a36Sopenharmony_ci } 108062306a36Sopenharmony_ci if (unlikely(out || in <= 0)) { 108162306a36Sopenharmony_ci vq_err(vq, "unexpected descriptor format for RX: " 108262306a36Sopenharmony_ci "out %d, in %d\n", out, in); 108362306a36Sopenharmony_ci r = -EINVAL; 108462306a36Sopenharmony_ci goto err; 108562306a36Sopenharmony_ci } 108662306a36Sopenharmony_ci if (unlikely(log)) { 108762306a36Sopenharmony_ci nlogs += *log_num; 108862306a36Sopenharmony_ci log += *log_num; 108962306a36Sopenharmony_ci } 109062306a36Sopenharmony_ci heads[headcount].id = cpu_to_vhost32(vq, d); 109162306a36Sopenharmony_ci len = iov_length(vq->iov + seg, in); 109262306a36Sopenharmony_ci heads[headcount].len = cpu_to_vhost32(vq, len); 109362306a36Sopenharmony_ci datalen -= len; 109462306a36Sopenharmony_ci ++headcount; 109562306a36Sopenharmony_ci seg += in; 109662306a36Sopenharmony_ci } 109762306a36Sopenharmony_ci heads[headcount - 1].len = cpu_to_vhost32(vq, len + datalen); 109862306a36Sopenharmony_ci *iovcount = seg; 109962306a36Sopenharmony_ci if (unlikely(log)) 110062306a36Sopenharmony_ci *log_num = nlogs; 110162306a36Sopenharmony_ci 110262306a36Sopenharmony_ci /* Detect overrun */ 110362306a36Sopenharmony_ci if (unlikely(datalen > 0)) { 110462306a36Sopenharmony_ci r = UIO_MAXIOV + 1; 110562306a36Sopenharmony_ci goto err; 110662306a36Sopenharmony_ci } 110762306a36Sopenharmony_ci return headcount; 110862306a36Sopenharmony_cierr: 110962306a36Sopenharmony_ci vhost_discard_vq_desc(vq, headcount); 111062306a36Sopenharmony_ci return r; 111162306a36Sopenharmony_ci} 111262306a36Sopenharmony_ci 111362306a36Sopenharmony_ci/* Expects to be always run from workqueue - which acts as 111462306a36Sopenharmony_ci * read-size critical section for our kind of RCU. */ 111562306a36Sopenharmony_cistatic void handle_rx(struct vhost_net *net) 111662306a36Sopenharmony_ci{ 111762306a36Sopenharmony_ci struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_RX]; 111862306a36Sopenharmony_ci struct vhost_virtqueue *vq = &nvq->vq; 111962306a36Sopenharmony_ci unsigned in, log; 112062306a36Sopenharmony_ci struct vhost_log *vq_log; 112162306a36Sopenharmony_ci struct msghdr msg = { 112262306a36Sopenharmony_ci .msg_name = NULL, 112362306a36Sopenharmony_ci .msg_namelen = 0, 112462306a36Sopenharmony_ci .msg_control = NULL, /* FIXME: get and handle RX aux data. */ 112562306a36Sopenharmony_ci .msg_controllen = 0, 112662306a36Sopenharmony_ci .msg_flags = MSG_DONTWAIT, 112762306a36Sopenharmony_ci }; 112862306a36Sopenharmony_ci struct virtio_net_hdr hdr = { 112962306a36Sopenharmony_ci .flags = 0, 113062306a36Sopenharmony_ci .gso_type = VIRTIO_NET_HDR_GSO_NONE 113162306a36Sopenharmony_ci }; 113262306a36Sopenharmony_ci size_t total_len = 0; 113362306a36Sopenharmony_ci int err, mergeable; 113462306a36Sopenharmony_ci s16 headcount; 113562306a36Sopenharmony_ci size_t vhost_hlen, sock_hlen; 113662306a36Sopenharmony_ci size_t vhost_len, sock_len; 113762306a36Sopenharmony_ci bool busyloop_intr = false; 113862306a36Sopenharmony_ci struct socket *sock; 113962306a36Sopenharmony_ci struct iov_iter fixup; 114062306a36Sopenharmony_ci __virtio16 num_buffers; 114162306a36Sopenharmony_ci int recv_pkts = 0; 114262306a36Sopenharmony_ci 114362306a36Sopenharmony_ci mutex_lock_nested(&vq->mutex, VHOST_NET_VQ_RX); 114462306a36Sopenharmony_ci sock = vhost_vq_get_backend(vq); 114562306a36Sopenharmony_ci if (!sock) 114662306a36Sopenharmony_ci goto out; 114762306a36Sopenharmony_ci 114862306a36Sopenharmony_ci if (!vq_meta_prefetch(vq)) 114962306a36Sopenharmony_ci goto out; 115062306a36Sopenharmony_ci 115162306a36Sopenharmony_ci vhost_disable_notify(&net->dev, vq); 115262306a36Sopenharmony_ci vhost_net_disable_vq(net, vq); 115362306a36Sopenharmony_ci 115462306a36Sopenharmony_ci vhost_hlen = nvq->vhost_hlen; 115562306a36Sopenharmony_ci sock_hlen = nvq->sock_hlen; 115662306a36Sopenharmony_ci 115762306a36Sopenharmony_ci vq_log = unlikely(vhost_has_feature(vq, VHOST_F_LOG_ALL)) ? 115862306a36Sopenharmony_ci vq->log : NULL; 115962306a36Sopenharmony_ci mergeable = vhost_has_feature(vq, VIRTIO_NET_F_MRG_RXBUF); 116062306a36Sopenharmony_ci 116162306a36Sopenharmony_ci do { 116262306a36Sopenharmony_ci sock_len = vhost_net_rx_peek_head_len(net, sock->sk, 116362306a36Sopenharmony_ci &busyloop_intr); 116462306a36Sopenharmony_ci if (!sock_len) 116562306a36Sopenharmony_ci break; 116662306a36Sopenharmony_ci sock_len += sock_hlen; 116762306a36Sopenharmony_ci vhost_len = sock_len + vhost_hlen; 116862306a36Sopenharmony_ci headcount = get_rx_bufs(vq, vq->heads + nvq->done_idx, 116962306a36Sopenharmony_ci vhost_len, &in, vq_log, &log, 117062306a36Sopenharmony_ci likely(mergeable) ? UIO_MAXIOV : 1); 117162306a36Sopenharmony_ci /* On error, stop handling until the next kick. */ 117262306a36Sopenharmony_ci if (unlikely(headcount < 0)) 117362306a36Sopenharmony_ci goto out; 117462306a36Sopenharmony_ci /* OK, now we need to know about added descriptors. */ 117562306a36Sopenharmony_ci if (!headcount) { 117662306a36Sopenharmony_ci if (unlikely(busyloop_intr)) { 117762306a36Sopenharmony_ci vhost_poll_queue(&vq->poll); 117862306a36Sopenharmony_ci } else if (unlikely(vhost_enable_notify(&net->dev, vq))) { 117962306a36Sopenharmony_ci /* They have slipped one in as we were 118062306a36Sopenharmony_ci * doing that: check again. */ 118162306a36Sopenharmony_ci vhost_disable_notify(&net->dev, vq); 118262306a36Sopenharmony_ci continue; 118362306a36Sopenharmony_ci } 118462306a36Sopenharmony_ci /* Nothing new? Wait for eventfd to tell us 118562306a36Sopenharmony_ci * they refilled. */ 118662306a36Sopenharmony_ci goto out; 118762306a36Sopenharmony_ci } 118862306a36Sopenharmony_ci busyloop_intr = false; 118962306a36Sopenharmony_ci if (nvq->rx_ring) 119062306a36Sopenharmony_ci msg.msg_control = vhost_net_buf_consume(&nvq->rxq); 119162306a36Sopenharmony_ci /* On overrun, truncate and discard */ 119262306a36Sopenharmony_ci if (unlikely(headcount > UIO_MAXIOV)) { 119362306a36Sopenharmony_ci iov_iter_init(&msg.msg_iter, ITER_DEST, vq->iov, 1, 1); 119462306a36Sopenharmony_ci err = sock->ops->recvmsg(sock, &msg, 119562306a36Sopenharmony_ci 1, MSG_DONTWAIT | MSG_TRUNC); 119662306a36Sopenharmony_ci pr_debug("Discarded rx packet: len %zd\n", sock_len); 119762306a36Sopenharmony_ci continue; 119862306a36Sopenharmony_ci } 119962306a36Sopenharmony_ci /* We don't need to be notified again. */ 120062306a36Sopenharmony_ci iov_iter_init(&msg.msg_iter, ITER_DEST, vq->iov, in, vhost_len); 120162306a36Sopenharmony_ci fixup = msg.msg_iter; 120262306a36Sopenharmony_ci if (unlikely((vhost_hlen))) { 120362306a36Sopenharmony_ci /* We will supply the header ourselves 120462306a36Sopenharmony_ci * TODO: support TSO. 120562306a36Sopenharmony_ci */ 120662306a36Sopenharmony_ci iov_iter_advance(&msg.msg_iter, vhost_hlen); 120762306a36Sopenharmony_ci } 120862306a36Sopenharmony_ci err = sock->ops->recvmsg(sock, &msg, 120962306a36Sopenharmony_ci sock_len, MSG_DONTWAIT | MSG_TRUNC); 121062306a36Sopenharmony_ci /* Userspace might have consumed the packet meanwhile: 121162306a36Sopenharmony_ci * it's not supposed to do this usually, but might be hard 121262306a36Sopenharmony_ci * to prevent. Discard data we got (if any) and keep going. */ 121362306a36Sopenharmony_ci if (unlikely(err != sock_len)) { 121462306a36Sopenharmony_ci pr_debug("Discarded rx packet: " 121562306a36Sopenharmony_ci " len %d, expected %zd\n", err, sock_len); 121662306a36Sopenharmony_ci vhost_discard_vq_desc(vq, headcount); 121762306a36Sopenharmony_ci continue; 121862306a36Sopenharmony_ci } 121962306a36Sopenharmony_ci /* Supply virtio_net_hdr if VHOST_NET_F_VIRTIO_NET_HDR */ 122062306a36Sopenharmony_ci if (unlikely(vhost_hlen)) { 122162306a36Sopenharmony_ci if (copy_to_iter(&hdr, sizeof(hdr), 122262306a36Sopenharmony_ci &fixup) != sizeof(hdr)) { 122362306a36Sopenharmony_ci vq_err(vq, "Unable to write vnet_hdr " 122462306a36Sopenharmony_ci "at addr %p\n", vq->iov->iov_base); 122562306a36Sopenharmony_ci goto out; 122662306a36Sopenharmony_ci } 122762306a36Sopenharmony_ci } else { 122862306a36Sopenharmony_ci /* Header came from socket; we'll need to patch 122962306a36Sopenharmony_ci * ->num_buffers over if VIRTIO_NET_F_MRG_RXBUF 123062306a36Sopenharmony_ci */ 123162306a36Sopenharmony_ci iov_iter_advance(&fixup, sizeof(hdr)); 123262306a36Sopenharmony_ci } 123362306a36Sopenharmony_ci /* TODO: Should check and handle checksum. */ 123462306a36Sopenharmony_ci 123562306a36Sopenharmony_ci num_buffers = cpu_to_vhost16(vq, headcount); 123662306a36Sopenharmony_ci if (likely(mergeable) && 123762306a36Sopenharmony_ci copy_to_iter(&num_buffers, sizeof num_buffers, 123862306a36Sopenharmony_ci &fixup) != sizeof num_buffers) { 123962306a36Sopenharmony_ci vq_err(vq, "Failed num_buffers write"); 124062306a36Sopenharmony_ci vhost_discard_vq_desc(vq, headcount); 124162306a36Sopenharmony_ci goto out; 124262306a36Sopenharmony_ci } 124362306a36Sopenharmony_ci nvq->done_idx += headcount; 124462306a36Sopenharmony_ci if (nvq->done_idx > VHOST_NET_BATCH) 124562306a36Sopenharmony_ci vhost_net_signal_used(nvq); 124662306a36Sopenharmony_ci if (unlikely(vq_log)) 124762306a36Sopenharmony_ci vhost_log_write(vq, vq_log, log, vhost_len, 124862306a36Sopenharmony_ci vq->iov, in); 124962306a36Sopenharmony_ci total_len += vhost_len; 125062306a36Sopenharmony_ci } while (likely(!vhost_exceeds_weight(vq, ++recv_pkts, total_len))); 125162306a36Sopenharmony_ci 125262306a36Sopenharmony_ci if (unlikely(busyloop_intr)) 125362306a36Sopenharmony_ci vhost_poll_queue(&vq->poll); 125462306a36Sopenharmony_ci else if (!sock_len) 125562306a36Sopenharmony_ci vhost_net_enable_vq(net, vq); 125662306a36Sopenharmony_ciout: 125762306a36Sopenharmony_ci vhost_net_signal_used(nvq); 125862306a36Sopenharmony_ci mutex_unlock(&vq->mutex); 125962306a36Sopenharmony_ci} 126062306a36Sopenharmony_ci 126162306a36Sopenharmony_cistatic void handle_tx_kick(struct vhost_work *work) 126262306a36Sopenharmony_ci{ 126362306a36Sopenharmony_ci struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue, 126462306a36Sopenharmony_ci poll.work); 126562306a36Sopenharmony_ci struct vhost_net *net = container_of(vq->dev, struct vhost_net, dev); 126662306a36Sopenharmony_ci 126762306a36Sopenharmony_ci handle_tx(net); 126862306a36Sopenharmony_ci} 126962306a36Sopenharmony_ci 127062306a36Sopenharmony_cistatic void handle_rx_kick(struct vhost_work *work) 127162306a36Sopenharmony_ci{ 127262306a36Sopenharmony_ci struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue, 127362306a36Sopenharmony_ci poll.work); 127462306a36Sopenharmony_ci struct vhost_net *net = container_of(vq->dev, struct vhost_net, dev); 127562306a36Sopenharmony_ci 127662306a36Sopenharmony_ci handle_rx(net); 127762306a36Sopenharmony_ci} 127862306a36Sopenharmony_ci 127962306a36Sopenharmony_cistatic void handle_tx_net(struct vhost_work *work) 128062306a36Sopenharmony_ci{ 128162306a36Sopenharmony_ci struct vhost_net *net = container_of(work, struct vhost_net, 128262306a36Sopenharmony_ci poll[VHOST_NET_VQ_TX].work); 128362306a36Sopenharmony_ci handle_tx(net); 128462306a36Sopenharmony_ci} 128562306a36Sopenharmony_ci 128662306a36Sopenharmony_cistatic void handle_rx_net(struct vhost_work *work) 128762306a36Sopenharmony_ci{ 128862306a36Sopenharmony_ci struct vhost_net *net = container_of(work, struct vhost_net, 128962306a36Sopenharmony_ci poll[VHOST_NET_VQ_RX].work); 129062306a36Sopenharmony_ci handle_rx(net); 129162306a36Sopenharmony_ci} 129262306a36Sopenharmony_ci 129362306a36Sopenharmony_cistatic int vhost_net_open(struct inode *inode, struct file *f) 129462306a36Sopenharmony_ci{ 129562306a36Sopenharmony_ci struct vhost_net *n; 129662306a36Sopenharmony_ci struct vhost_dev *dev; 129762306a36Sopenharmony_ci struct vhost_virtqueue **vqs; 129862306a36Sopenharmony_ci void **queue; 129962306a36Sopenharmony_ci struct xdp_buff *xdp; 130062306a36Sopenharmony_ci int i; 130162306a36Sopenharmony_ci 130262306a36Sopenharmony_ci n = kvmalloc(sizeof *n, GFP_KERNEL | __GFP_RETRY_MAYFAIL); 130362306a36Sopenharmony_ci if (!n) 130462306a36Sopenharmony_ci return -ENOMEM; 130562306a36Sopenharmony_ci vqs = kmalloc_array(VHOST_NET_VQ_MAX, sizeof(*vqs), GFP_KERNEL); 130662306a36Sopenharmony_ci if (!vqs) { 130762306a36Sopenharmony_ci kvfree(n); 130862306a36Sopenharmony_ci return -ENOMEM; 130962306a36Sopenharmony_ci } 131062306a36Sopenharmony_ci 131162306a36Sopenharmony_ci queue = kmalloc_array(VHOST_NET_BATCH, sizeof(void *), 131262306a36Sopenharmony_ci GFP_KERNEL); 131362306a36Sopenharmony_ci if (!queue) { 131462306a36Sopenharmony_ci kfree(vqs); 131562306a36Sopenharmony_ci kvfree(n); 131662306a36Sopenharmony_ci return -ENOMEM; 131762306a36Sopenharmony_ci } 131862306a36Sopenharmony_ci n->vqs[VHOST_NET_VQ_RX].rxq.queue = queue; 131962306a36Sopenharmony_ci 132062306a36Sopenharmony_ci xdp = kmalloc_array(VHOST_NET_BATCH, sizeof(*xdp), GFP_KERNEL); 132162306a36Sopenharmony_ci if (!xdp) { 132262306a36Sopenharmony_ci kfree(vqs); 132362306a36Sopenharmony_ci kvfree(n); 132462306a36Sopenharmony_ci kfree(queue); 132562306a36Sopenharmony_ci return -ENOMEM; 132662306a36Sopenharmony_ci } 132762306a36Sopenharmony_ci n->vqs[VHOST_NET_VQ_TX].xdp = xdp; 132862306a36Sopenharmony_ci 132962306a36Sopenharmony_ci dev = &n->dev; 133062306a36Sopenharmony_ci vqs[VHOST_NET_VQ_TX] = &n->vqs[VHOST_NET_VQ_TX].vq; 133162306a36Sopenharmony_ci vqs[VHOST_NET_VQ_RX] = &n->vqs[VHOST_NET_VQ_RX].vq; 133262306a36Sopenharmony_ci n->vqs[VHOST_NET_VQ_TX].vq.handle_kick = handle_tx_kick; 133362306a36Sopenharmony_ci n->vqs[VHOST_NET_VQ_RX].vq.handle_kick = handle_rx_kick; 133462306a36Sopenharmony_ci for (i = 0; i < VHOST_NET_VQ_MAX; i++) { 133562306a36Sopenharmony_ci n->vqs[i].ubufs = NULL; 133662306a36Sopenharmony_ci n->vqs[i].ubuf_info = NULL; 133762306a36Sopenharmony_ci n->vqs[i].upend_idx = 0; 133862306a36Sopenharmony_ci n->vqs[i].done_idx = 0; 133962306a36Sopenharmony_ci n->vqs[i].batched_xdp = 0; 134062306a36Sopenharmony_ci n->vqs[i].vhost_hlen = 0; 134162306a36Sopenharmony_ci n->vqs[i].sock_hlen = 0; 134262306a36Sopenharmony_ci n->vqs[i].rx_ring = NULL; 134362306a36Sopenharmony_ci vhost_net_buf_init(&n->vqs[i].rxq); 134462306a36Sopenharmony_ci } 134562306a36Sopenharmony_ci vhost_dev_init(dev, vqs, VHOST_NET_VQ_MAX, 134662306a36Sopenharmony_ci UIO_MAXIOV + VHOST_NET_BATCH, 134762306a36Sopenharmony_ci VHOST_NET_PKT_WEIGHT, VHOST_NET_WEIGHT, true, 134862306a36Sopenharmony_ci NULL); 134962306a36Sopenharmony_ci 135062306a36Sopenharmony_ci vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, EPOLLOUT, dev, 135162306a36Sopenharmony_ci vqs[VHOST_NET_VQ_TX]); 135262306a36Sopenharmony_ci vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, EPOLLIN, dev, 135362306a36Sopenharmony_ci vqs[VHOST_NET_VQ_RX]); 135462306a36Sopenharmony_ci 135562306a36Sopenharmony_ci f->private_data = n; 135662306a36Sopenharmony_ci n->page_frag.page = NULL; 135762306a36Sopenharmony_ci n->refcnt_bias = 0; 135862306a36Sopenharmony_ci 135962306a36Sopenharmony_ci return 0; 136062306a36Sopenharmony_ci} 136162306a36Sopenharmony_ci 136262306a36Sopenharmony_cistatic struct socket *vhost_net_stop_vq(struct vhost_net *n, 136362306a36Sopenharmony_ci struct vhost_virtqueue *vq) 136462306a36Sopenharmony_ci{ 136562306a36Sopenharmony_ci struct socket *sock; 136662306a36Sopenharmony_ci struct vhost_net_virtqueue *nvq = 136762306a36Sopenharmony_ci container_of(vq, struct vhost_net_virtqueue, vq); 136862306a36Sopenharmony_ci 136962306a36Sopenharmony_ci mutex_lock(&vq->mutex); 137062306a36Sopenharmony_ci sock = vhost_vq_get_backend(vq); 137162306a36Sopenharmony_ci vhost_net_disable_vq(n, vq); 137262306a36Sopenharmony_ci vhost_vq_set_backend(vq, NULL); 137362306a36Sopenharmony_ci vhost_net_buf_unproduce(nvq); 137462306a36Sopenharmony_ci nvq->rx_ring = NULL; 137562306a36Sopenharmony_ci mutex_unlock(&vq->mutex); 137662306a36Sopenharmony_ci return sock; 137762306a36Sopenharmony_ci} 137862306a36Sopenharmony_ci 137962306a36Sopenharmony_cistatic void vhost_net_stop(struct vhost_net *n, struct socket **tx_sock, 138062306a36Sopenharmony_ci struct socket **rx_sock) 138162306a36Sopenharmony_ci{ 138262306a36Sopenharmony_ci *tx_sock = vhost_net_stop_vq(n, &n->vqs[VHOST_NET_VQ_TX].vq); 138362306a36Sopenharmony_ci *rx_sock = vhost_net_stop_vq(n, &n->vqs[VHOST_NET_VQ_RX].vq); 138462306a36Sopenharmony_ci} 138562306a36Sopenharmony_ci 138662306a36Sopenharmony_cistatic void vhost_net_flush(struct vhost_net *n) 138762306a36Sopenharmony_ci{ 138862306a36Sopenharmony_ci vhost_dev_flush(&n->dev); 138962306a36Sopenharmony_ci if (n->vqs[VHOST_NET_VQ_TX].ubufs) { 139062306a36Sopenharmony_ci mutex_lock(&n->vqs[VHOST_NET_VQ_TX].vq.mutex); 139162306a36Sopenharmony_ci n->tx_flush = true; 139262306a36Sopenharmony_ci mutex_unlock(&n->vqs[VHOST_NET_VQ_TX].vq.mutex); 139362306a36Sopenharmony_ci /* Wait for all lower device DMAs done. */ 139462306a36Sopenharmony_ci vhost_net_ubuf_put_and_wait(n->vqs[VHOST_NET_VQ_TX].ubufs); 139562306a36Sopenharmony_ci mutex_lock(&n->vqs[VHOST_NET_VQ_TX].vq.mutex); 139662306a36Sopenharmony_ci n->tx_flush = false; 139762306a36Sopenharmony_ci atomic_set(&n->vqs[VHOST_NET_VQ_TX].ubufs->refcount, 1); 139862306a36Sopenharmony_ci mutex_unlock(&n->vqs[VHOST_NET_VQ_TX].vq.mutex); 139962306a36Sopenharmony_ci } 140062306a36Sopenharmony_ci} 140162306a36Sopenharmony_ci 140262306a36Sopenharmony_cistatic int vhost_net_release(struct inode *inode, struct file *f) 140362306a36Sopenharmony_ci{ 140462306a36Sopenharmony_ci struct vhost_net *n = f->private_data; 140562306a36Sopenharmony_ci struct socket *tx_sock; 140662306a36Sopenharmony_ci struct socket *rx_sock; 140762306a36Sopenharmony_ci 140862306a36Sopenharmony_ci vhost_net_stop(n, &tx_sock, &rx_sock); 140962306a36Sopenharmony_ci vhost_net_flush(n); 141062306a36Sopenharmony_ci vhost_dev_stop(&n->dev); 141162306a36Sopenharmony_ci vhost_dev_cleanup(&n->dev); 141262306a36Sopenharmony_ci vhost_net_vq_reset(n); 141362306a36Sopenharmony_ci if (tx_sock) 141462306a36Sopenharmony_ci sockfd_put(tx_sock); 141562306a36Sopenharmony_ci if (rx_sock) 141662306a36Sopenharmony_ci sockfd_put(rx_sock); 141762306a36Sopenharmony_ci /* Make sure no callbacks are outstanding */ 141862306a36Sopenharmony_ci synchronize_rcu(); 141962306a36Sopenharmony_ci /* We do an extra flush before freeing memory, 142062306a36Sopenharmony_ci * since jobs can re-queue themselves. */ 142162306a36Sopenharmony_ci vhost_net_flush(n); 142262306a36Sopenharmony_ci kfree(n->vqs[VHOST_NET_VQ_RX].rxq.queue); 142362306a36Sopenharmony_ci kfree(n->vqs[VHOST_NET_VQ_TX].xdp); 142462306a36Sopenharmony_ci kfree(n->dev.vqs); 142562306a36Sopenharmony_ci if (n->page_frag.page) 142662306a36Sopenharmony_ci __page_frag_cache_drain(n->page_frag.page, n->refcnt_bias); 142762306a36Sopenharmony_ci kvfree(n); 142862306a36Sopenharmony_ci return 0; 142962306a36Sopenharmony_ci} 143062306a36Sopenharmony_ci 143162306a36Sopenharmony_cistatic struct socket *get_raw_socket(int fd) 143262306a36Sopenharmony_ci{ 143362306a36Sopenharmony_ci int r; 143462306a36Sopenharmony_ci struct socket *sock = sockfd_lookup(fd, &r); 143562306a36Sopenharmony_ci 143662306a36Sopenharmony_ci if (!sock) 143762306a36Sopenharmony_ci return ERR_PTR(-ENOTSOCK); 143862306a36Sopenharmony_ci 143962306a36Sopenharmony_ci /* Parameter checking */ 144062306a36Sopenharmony_ci if (sock->sk->sk_type != SOCK_RAW) { 144162306a36Sopenharmony_ci r = -ESOCKTNOSUPPORT; 144262306a36Sopenharmony_ci goto err; 144362306a36Sopenharmony_ci } 144462306a36Sopenharmony_ci 144562306a36Sopenharmony_ci if (sock->sk->sk_family != AF_PACKET) { 144662306a36Sopenharmony_ci r = -EPFNOSUPPORT; 144762306a36Sopenharmony_ci goto err; 144862306a36Sopenharmony_ci } 144962306a36Sopenharmony_ci return sock; 145062306a36Sopenharmony_cierr: 145162306a36Sopenharmony_ci sockfd_put(sock); 145262306a36Sopenharmony_ci return ERR_PTR(r); 145362306a36Sopenharmony_ci} 145462306a36Sopenharmony_ci 145562306a36Sopenharmony_cistatic struct ptr_ring *get_tap_ptr_ring(struct file *file) 145662306a36Sopenharmony_ci{ 145762306a36Sopenharmony_ci struct ptr_ring *ring; 145862306a36Sopenharmony_ci ring = tun_get_tx_ring(file); 145962306a36Sopenharmony_ci if (!IS_ERR(ring)) 146062306a36Sopenharmony_ci goto out; 146162306a36Sopenharmony_ci ring = tap_get_ptr_ring(file); 146262306a36Sopenharmony_ci if (!IS_ERR(ring)) 146362306a36Sopenharmony_ci goto out; 146462306a36Sopenharmony_ci ring = NULL; 146562306a36Sopenharmony_ciout: 146662306a36Sopenharmony_ci return ring; 146762306a36Sopenharmony_ci} 146862306a36Sopenharmony_ci 146962306a36Sopenharmony_cistatic struct socket *get_tap_socket(int fd) 147062306a36Sopenharmony_ci{ 147162306a36Sopenharmony_ci struct file *file = fget(fd); 147262306a36Sopenharmony_ci struct socket *sock; 147362306a36Sopenharmony_ci 147462306a36Sopenharmony_ci if (!file) 147562306a36Sopenharmony_ci return ERR_PTR(-EBADF); 147662306a36Sopenharmony_ci sock = tun_get_socket(file); 147762306a36Sopenharmony_ci if (!IS_ERR(sock)) 147862306a36Sopenharmony_ci return sock; 147962306a36Sopenharmony_ci sock = tap_get_socket(file); 148062306a36Sopenharmony_ci if (IS_ERR(sock)) 148162306a36Sopenharmony_ci fput(file); 148262306a36Sopenharmony_ci return sock; 148362306a36Sopenharmony_ci} 148462306a36Sopenharmony_ci 148562306a36Sopenharmony_cistatic struct socket *get_socket(int fd) 148662306a36Sopenharmony_ci{ 148762306a36Sopenharmony_ci struct socket *sock; 148862306a36Sopenharmony_ci 148962306a36Sopenharmony_ci /* special case to disable backend */ 149062306a36Sopenharmony_ci if (fd == -1) 149162306a36Sopenharmony_ci return NULL; 149262306a36Sopenharmony_ci sock = get_raw_socket(fd); 149362306a36Sopenharmony_ci if (!IS_ERR(sock)) 149462306a36Sopenharmony_ci return sock; 149562306a36Sopenharmony_ci sock = get_tap_socket(fd); 149662306a36Sopenharmony_ci if (!IS_ERR(sock)) 149762306a36Sopenharmony_ci return sock; 149862306a36Sopenharmony_ci return ERR_PTR(-ENOTSOCK); 149962306a36Sopenharmony_ci} 150062306a36Sopenharmony_ci 150162306a36Sopenharmony_cistatic long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) 150262306a36Sopenharmony_ci{ 150362306a36Sopenharmony_ci struct socket *sock, *oldsock; 150462306a36Sopenharmony_ci struct vhost_virtqueue *vq; 150562306a36Sopenharmony_ci struct vhost_net_virtqueue *nvq; 150662306a36Sopenharmony_ci struct vhost_net_ubuf_ref *ubufs, *oldubufs = NULL; 150762306a36Sopenharmony_ci int r; 150862306a36Sopenharmony_ci 150962306a36Sopenharmony_ci mutex_lock(&n->dev.mutex); 151062306a36Sopenharmony_ci r = vhost_dev_check_owner(&n->dev); 151162306a36Sopenharmony_ci if (r) 151262306a36Sopenharmony_ci goto err; 151362306a36Sopenharmony_ci 151462306a36Sopenharmony_ci if (index >= VHOST_NET_VQ_MAX) { 151562306a36Sopenharmony_ci r = -ENOBUFS; 151662306a36Sopenharmony_ci goto err; 151762306a36Sopenharmony_ci } 151862306a36Sopenharmony_ci vq = &n->vqs[index].vq; 151962306a36Sopenharmony_ci nvq = &n->vqs[index]; 152062306a36Sopenharmony_ci mutex_lock(&vq->mutex); 152162306a36Sopenharmony_ci 152262306a36Sopenharmony_ci if (fd == -1) 152362306a36Sopenharmony_ci vhost_clear_msg(&n->dev); 152462306a36Sopenharmony_ci 152562306a36Sopenharmony_ci /* Verify that ring has been setup correctly. */ 152662306a36Sopenharmony_ci if (!vhost_vq_access_ok(vq)) { 152762306a36Sopenharmony_ci r = -EFAULT; 152862306a36Sopenharmony_ci goto err_vq; 152962306a36Sopenharmony_ci } 153062306a36Sopenharmony_ci sock = get_socket(fd); 153162306a36Sopenharmony_ci if (IS_ERR(sock)) { 153262306a36Sopenharmony_ci r = PTR_ERR(sock); 153362306a36Sopenharmony_ci goto err_vq; 153462306a36Sopenharmony_ci } 153562306a36Sopenharmony_ci 153662306a36Sopenharmony_ci /* start polling new socket */ 153762306a36Sopenharmony_ci oldsock = vhost_vq_get_backend(vq); 153862306a36Sopenharmony_ci if (sock != oldsock) { 153962306a36Sopenharmony_ci ubufs = vhost_net_ubuf_alloc(vq, 154062306a36Sopenharmony_ci sock && vhost_sock_zcopy(sock)); 154162306a36Sopenharmony_ci if (IS_ERR(ubufs)) { 154262306a36Sopenharmony_ci r = PTR_ERR(ubufs); 154362306a36Sopenharmony_ci goto err_ubufs; 154462306a36Sopenharmony_ci } 154562306a36Sopenharmony_ci 154662306a36Sopenharmony_ci vhost_net_disable_vq(n, vq); 154762306a36Sopenharmony_ci vhost_vq_set_backend(vq, sock); 154862306a36Sopenharmony_ci vhost_net_buf_unproduce(nvq); 154962306a36Sopenharmony_ci r = vhost_vq_init_access(vq); 155062306a36Sopenharmony_ci if (r) 155162306a36Sopenharmony_ci goto err_used; 155262306a36Sopenharmony_ci r = vhost_net_enable_vq(n, vq); 155362306a36Sopenharmony_ci if (r) 155462306a36Sopenharmony_ci goto err_used; 155562306a36Sopenharmony_ci if (index == VHOST_NET_VQ_RX) { 155662306a36Sopenharmony_ci if (sock) 155762306a36Sopenharmony_ci nvq->rx_ring = get_tap_ptr_ring(sock->file); 155862306a36Sopenharmony_ci else 155962306a36Sopenharmony_ci nvq->rx_ring = NULL; 156062306a36Sopenharmony_ci } 156162306a36Sopenharmony_ci 156262306a36Sopenharmony_ci oldubufs = nvq->ubufs; 156362306a36Sopenharmony_ci nvq->ubufs = ubufs; 156462306a36Sopenharmony_ci 156562306a36Sopenharmony_ci n->tx_packets = 0; 156662306a36Sopenharmony_ci n->tx_zcopy_err = 0; 156762306a36Sopenharmony_ci n->tx_flush = false; 156862306a36Sopenharmony_ci } 156962306a36Sopenharmony_ci 157062306a36Sopenharmony_ci mutex_unlock(&vq->mutex); 157162306a36Sopenharmony_ci 157262306a36Sopenharmony_ci if (oldubufs) { 157362306a36Sopenharmony_ci vhost_net_ubuf_put_wait_and_free(oldubufs); 157462306a36Sopenharmony_ci mutex_lock(&vq->mutex); 157562306a36Sopenharmony_ci vhost_zerocopy_signal_used(n, vq); 157662306a36Sopenharmony_ci mutex_unlock(&vq->mutex); 157762306a36Sopenharmony_ci } 157862306a36Sopenharmony_ci 157962306a36Sopenharmony_ci if (oldsock) { 158062306a36Sopenharmony_ci vhost_dev_flush(&n->dev); 158162306a36Sopenharmony_ci sockfd_put(oldsock); 158262306a36Sopenharmony_ci } 158362306a36Sopenharmony_ci 158462306a36Sopenharmony_ci mutex_unlock(&n->dev.mutex); 158562306a36Sopenharmony_ci return 0; 158662306a36Sopenharmony_ci 158762306a36Sopenharmony_cierr_used: 158862306a36Sopenharmony_ci vhost_vq_set_backend(vq, oldsock); 158962306a36Sopenharmony_ci vhost_net_enable_vq(n, vq); 159062306a36Sopenharmony_ci if (ubufs) 159162306a36Sopenharmony_ci vhost_net_ubuf_put_wait_and_free(ubufs); 159262306a36Sopenharmony_cierr_ubufs: 159362306a36Sopenharmony_ci if (sock) 159462306a36Sopenharmony_ci sockfd_put(sock); 159562306a36Sopenharmony_cierr_vq: 159662306a36Sopenharmony_ci mutex_unlock(&vq->mutex); 159762306a36Sopenharmony_cierr: 159862306a36Sopenharmony_ci mutex_unlock(&n->dev.mutex); 159962306a36Sopenharmony_ci return r; 160062306a36Sopenharmony_ci} 160162306a36Sopenharmony_ci 160262306a36Sopenharmony_cistatic long vhost_net_reset_owner(struct vhost_net *n) 160362306a36Sopenharmony_ci{ 160462306a36Sopenharmony_ci struct socket *tx_sock = NULL; 160562306a36Sopenharmony_ci struct socket *rx_sock = NULL; 160662306a36Sopenharmony_ci long err; 160762306a36Sopenharmony_ci struct vhost_iotlb *umem; 160862306a36Sopenharmony_ci 160962306a36Sopenharmony_ci mutex_lock(&n->dev.mutex); 161062306a36Sopenharmony_ci err = vhost_dev_check_owner(&n->dev); 161162306a36Sopenharmony_ci if (err) 161262306a36Sopenharmony_ci goto done; 161362306a36Sopenharmony_ci umem = vhost_dev_reset_owner_prepare(); 161462306a36Sopenharmony_ci if (!umem) { 161562306a36Sopenharmony_ci err = -ENOMEM; 161662306a36Sopenharmony_ci goto done; 161762306a36Sopenharmony_ci } 161862306a36Sopenharmony_ci vhost_net_stop(n, &tx_sock, &rx_sock); 161962306a36Sopenharmony_ci vhost_net_flush(n); 162062306a36Sopenharmony_ci vhost_dev_stop(&n->dev); 162162306a36Sopenharmony_ci vhost_dev_reset_owner(&n->dev, umem); 162262306a36Sopenharmony_ci vhost_net_vq_reset(n); 162362306a36Sopenharmony_cidone: 162462306a36Sopenharmony_ci mutex_unlock(&n->dev.mutex); 162562306a36Sopenharmony_ci if (tx_sock) 162662306a36Sopenharmony_ci sockfd_put(tx_sock); 162762306a36Sopenharmony_ci if (rx_sock) 162862306a36Sopenharmony_ci sockfd_put(rx_sock); 162962306a36Sopenharmony_ci return err; 163062306a36Sopenharmony_ci} 163162306a36Sopenharmony_ci 163262306a36Sopenharmony_cistatic int vhost_net_set_features(struct vhost_net *n, u64 features) 163362306a36Sopenharmony_ci{ 163462306a36Sopenharmony_ci size_t vhost_hlen, sock_hlen, hdr_len; 163562306a36Sopenharmony_ci int i; 163662306a36Sopenharmony_ci 163762306a36Sopenharmony_ci hdr_len = (features & ((1ULL << VIRTIO_NET_F_MRG_RXBUF) | 163862306a36Sopenharmony_ci (1ULL << VIRTIO_F_VERSION_1))) ? 163962306a36Sopenharmony_ci sizeof(struct virtio_net_hdr_mrg_rxbuf) : 164062306a36Sopenharmony_ci sizeof(struct virtio_net_hdr); 164162306a36Sopenharmony_ci if (features & (1 << VHOST_NET_F_VIRTIO_NET_HDR)) { 164262306a36Sopenharmony_ci /* vhost provides vnet_hdr */ 164362306a36Sopenharmony_ci vhost_hlen = hdr_len; 164462306a36Sopenharmony_ci sock_hlen = 0; 164562306a36Sopenharmony_ci } else { 164662306a36Sopenharmony_ci /* socket provides vnet_hdr */ 164762306a36Sopenharmony_ci vhost_hlen = 0; 164862306a36Sopenharmony_ci sock_hlen = hdr_len; 164962306a36Sopenharmony_ci } 165062306a36Sopenharmony_ci mutex_lock(&n->dev.mutex); 165162306a36Sopenharmony_ci if ((features & (1 << VHOST_F_LOG_ALL)) && 165262306a36Sopenharmony_ci !vhost_log_access_ok(&n->dev)) 165362306a36Sopenharmony_ci goto out_unlock; 165462306a36Sopenharmony_ci 165562306a36Sopenharmony_ci if ((features & (1ULL << VIRTIO_F_ACCESS_PLATFORM))) { 165662306a36Sopenharmony_ci if (vhost_init_device_iotlb(&n->dev)) 165762306a36Sopenharmony_ci goto out_unlock; 165862306a36Sopenharmony_ci } 165962306a36Sopenharmony_ci 166062306a36Sopenharmony_ci for (i = 0; i < VHOST_NET_VQ_MAX; ++i) { 166162306a36Sopenharmony_ci mutex_lock(&n->vqs[i].vq.mutex); 166262306a36Sopenharmony_ci n->vqs[i].vq.acked_features = features; 166362306a36Sopenharmony_ci n->vqs[i].vhost_hlen = vhost_hlen; 166462306a36Sopenharmony_ci n->vqs[i].sock_hlen = sock_hlen; 166562306a36Sopenharmony_ci mutex_unlock(&n->vqs[i].vq.mutex); 166662306a36Sopenharmony_ci } 166762306a36Sopenharmony_ci mutex_unlock(&n->dev.mutex); 166862306a36Sopenharmony_ci return 0; 166962306a36Sopenharmony_ci 167062306a36Sopenharmony_ciout_unlock: 167162306a36Sopenharmony_ci mutex_unlock(&n->dev.mutex); 167262306a36Sopenharmony_ci return -EFAULT; 167362306a36Sopenharmony_ci} 167462306a36Sopenharmony_ci 167562306a36Sopenharmony_cistatic long vhost_net_set_owner(struct vhost_net *n) 167662306a36Sopenharmony_ci{ 167762306a36Sopenharmony_ci int r; 167862306a36Sopenharmony_ci 167962306a36Sopenharmony_ci mutex_lock(&n->dev.mutex); 168062306a36Sopenharmony_ci if (vhost_dev_has_owner(&n->dev)) { 168162306a36Sopenharmony_ci r = -EBUSY; 168262306a36Sopenharmony_ci goto out; 168362306a36Sopenharmony_ci } 168462306a36Sopenharmony_ci r = vhost_net_set_ubuf_info(n); 168562306a36Sopenharmony_ci if (r) 168662306a36Sopenharmony_ci goto out; 168762306a36Sopenharmony_ci r = vhost_dev_set_owner(&n->dev); 168862306a36Sopenharmony_ci if (r) 168962306a36Sopenharmony_ci vhost_net_clear_ubuf_info(n); 169062306a36Sopenharmony_ci vhost_net_flush(n); 169162306a36Sopenharmony_ciout: 169262306a36Sopenharmony_ci mutex_unlock(&n->dev.mutex); 169362306a36Sopenharmony_ci return r; 169462306a36Sopenharmony_ci} 169562306a36Sopenharmony_ci 169662306a36Sopenharmony_cistatic long vhost_net_ioctl(struct file *f, unsigned int ioctl, 169762306a36Sopenharmony_ci unsigned long arg) 169862306a36Sopenharmony_ci{ 169962306a36Sopenharmony_ci struct vhost_net *n = f->private_data; 170062306a36Sopenharmony_ci void __user *argp = (void __user *)arg; 170162306a36Sopenharmony_ci u64 __user *featurep = argp; 170262306a36Sopenharmony_ci struct vhost_vring_file backend; 170362306a36Sopenharmony_ci u64 features; 170462306a36Sopenharmony_ci int r; 170562306a36Sopenharmony_ci 170662306a36Sopenharmony_ci switch (ioctl) { 170762306a36Sopenharmony_ci case VHOST_NET_SET_BACKEND: 170862306a36Sopenharmony_ci if (copy_from_user(&backend, argp, sizeof backend)) 170962306a36Sopenharmony_ci return -EFAULT; 171062306a36Sopenharmony_ci return vhost_net_set_backend(n, backend.index, backend.fd); 171162306a36Sopenharmony_ci case VHOST_GET_FEATURES: 171262306a36Sopenharmony_ci features = VHOST_NET_FEATURES; 171362306a36Sopenharmony_ci if (copy_to_user(featurep, &features, sizeof features)) 171462306a36Sopenharmony_ci return -EFAULT; 171562306a36Sopenharmony_ci return 0; 171662306a36Sopenharmony_ci case VHOST_SET_FEATURES: 171762306a36Sopenharmony_ci if (copy_from_user(&features, featurep, sizeof features)) 171862306a36Sopenharmony_ci return -EFAULT; 171962306a36Sopenharmony_ci if (features & ~VHOST_NET_FEATURES) 172062306a36Sopenharmony_ci return -EOPNOTSUPP; 172162306a36Sopenharmony_ci return vhost_net_set_features(n, features); 172262306a36Sopenharmony_ci case VHOST_GET_BACKEND_FEATURES: 172362306a36Sopenharmony_ci features = VHOST_NET_BACKEND_FEATURES; 172462306a36Sopenharmony_ci if (copy_to_user(featurep, &features, sizeof(features))) 172562306a36Sopenharmony_ci return -EFAULT; 172662306a36Sopenharmony_ci return 0; 172762306a36Sopenharmony_ci case VHOST_SET_BACKEND_FEATURES: 172862306a36Sopenharmony_ci if (copy_from_user(&features, featurep, sizeof(features))) 172962306a36Sopenharmony_ci return -EFAULT; 173062306a36Sopenharmony_ci if (features & ~VHOST_NET_BACKEND_FEATURES) 173162306a36Sopenharmony_ci return -EOPNOTSUPP; 173262306a36Sopenharmony_ci vhost_set_backend_features(&n->dev, features); 173362306a36Sopenharmony_ci return 0; 173462306a36Sopenharmony_ci case VHOST_RESET_OWNER: 173562306a36Sopenharmony_ci return vhost_net_reset_owner(n); 173662306a36Sopenharmony_ci case VHOST_SET_OWNER: 173762306a36Sopenharmony_ci return vhost_net_set_owner(n); 173862306a36Sopenharmony_ci default: 173962306a36Sopenharmony_ci mutex_lock(&n->dev.mutex); 174062306a36Sopenharmony_ci r = vhost_dev_ioctl(&n->dev, ioctl, argp); 174162306a36Sopenharmony_ci if (r == -ENOIOCTLCMD) 174262306a36Sopenharmony_ci r = vhost_vring_ioctl(&n->dev, ioctl, argp); 174362306a36Sopenharmony_ci else 174462306a36Sopenharmony_ci vhost_net_flush(n); 174562306a36Sopenharmony_ci mutex_unlock(&n->dev.mutex); 174662306a36Sopenharmony_ci return r; 174762306a36Sopenharmony_ci } 174862306a36Sopenharmony_ci} 174962306a36Sopenharmony_ci 175062306a36Sopenharmony_cistatic ssize_t vhost_net_chr_read_iter(struct kiocb *iocb, struct iov_iter *to) 175162306a36Sopenharmony_ci{ 175262306a36Sopenharmony_ci struct file *file = iocb->ki_filp; 175362306a36Sopenharmony_ci struct vhost_net *n = file->private_data; 175462306a36Sopenharmony_ci struct vhost_dev *dev = &n->dev; 175562306a36Sopenharmony_ci int noblock = file->f_flags & O_NONBLOCK; 175662306a36Sopenharmony_ci 175762306a36Sopenharmony_ci return vhost_chr_read_iter(dev, to, noblock); 175862306a36Sopenharmony_ci} 175962306a36Sopenharmony_ci 176062306a36Sopenharmony_cistatic ssize_t vhost_net_chr_write_iter(struct kiocb *iocb, 176162306a36Sopenharmony_ci struct iov_iter *from) 176262306a36Sopenharmony_ci{ 176362306a36Sopenharmony_ci struct file *file = iocb->ki_filp; 176462306a36Sopenharmony_ci struct vhost_net *n = file->private_data; 176562306a36Sopenharmony_ci struct vhost_dev *dev = &n->dev; 176662306a36Sopenharmony_ci 176762306a36Sopenharmony_ci return vhost_chr_write_iter(dev, from); 176862306a36Sopenharmony_ci} 176962306a36Sopenharmony_ci 177062306a36Sopenharmony_cistatic __poll_t vhost_net_chr_poll(struct file *file, poll_table *wait) 177162306a36Sopenharmony_ci{ 177262306a36Sopenharmony_ci struct vhost_net *n = file->private_data; 177362306a36Sopenharmony_ci struct vhost_dev *dev = &n->dev; 177462306a36Sopenharmony_ci 177562306a36Sopenharmony_ci return vhost_chr_poll(file, dev, wait); 177662306a36Sopenharmony_ci} 177762306a36Sopenharmony_ci 177862306a36Sopenharmony_cistatic const struct file_operations vhost_net_fops = { 177962306a36Sopenharmony_ci .owner = THIS_MODULE, 178062306a36Sopenharmony_ci .release = vhost_net_release, 178162306a36Sopenharmony_ci .read_iter = vhost_net_chr_read_iter, 178262306a36Sopenharmony_ci .write_iter = vhost_net_chr_write_iter, 178362306a36Sopenharmony_ci .poll = vhost_net_chr_poll, 178462306a36Sopenharmony_ci .unlocked_ioctl = vhost_net_ioctl, 178562306a36Sopenharmony_ci .compat_ioctl = compat_ptr_ioctl, 178662306a36Sopenharmony_ci .open = vhost_net_open, 178762306a36Sopenharmony_ci .llseek = noop_llseek, 178862306a36Sopenharmony_ci}; 178962306a36Sopenharmony_ci 179062306a36Sopenharmony_cistatic struct miscdevice vhost_net_misc = { 179162306a36Sopenharmony_ci .minor = VHOST_NET_MINOR, 179262306a36Sopenharmony_ci .name = "vhost-net", 179362306a36Sopenharmony_ci .fops = &vhost_net_fops, 179462306a36Sopenharmony_ci}; 179562306a36Sopenharmony_ci 179662306a36Sopenharmony_cistatic int __init vhost_net_init(void) 179762306a36Sopenharmony_ci{ 179862306a36Sopenharmony_ci if (experimental_zcopytx) 179962306a36Sopenharmony_ci vhost_net_enable_zcopy(VHOST_NET_VQ_TX); 180062306a36Sopenharmony_ci return misc_register(&vhost_net_misc); 180162306a36Sopenharmony_ci} 180262306a36Sopenharmony_cimodule_init(vhost_net_init); 180362306a36Sopenharmony_ci 180462306a36Sopenharmony_cistatic void __exit vhost_net_exit(void) 180562306a36Sopenharmony_ci{ 180662306a36Sopenharmony_ci misc_deregister(&vhost_net_misc); 180762306a36Sopenharmony_ci} 180862306a36Sopenharmony_cimodule_exit(vhost_net_exit); 180962306a36Sopenharmony_ci 181062306a36Sopenharmony_ciMODULE_VERSION("0.0.1"); 181162306a36Sopenharmony_ciMODULE_LICENSE("GPL v2"); 181262306a36Sopenharmony_ciMODULE_AUTHOR("Michael S. Tsirkin"); 181362306a36Sopenharmony_ciMODULE_DESCRIPTION("Host kernel accelerator for virtio net"); 181462306a36Sopenharmony_ciMODULE_ALIAS_MISCDEV(VHOST_NET_MINOR); 181562306a36Sopenharmony_ciMODULE_ALIAS("devname:vhost-net"); 1816