162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * vhost transport for vsock
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci * Copyright (C) 2013-2015 Red Hat, Inc.
662306a36Sopenharmony_ci * Author: Asias He <asias@redhat.com>
762306a36Sopenharmony_ci *         Stefan Hajnoczi <stefanha@redhat.com>
862306a36Sopenharmony_ci */
962306a36Sopenharmony_ci#include <linux/miscdevice.h>
1062306a36Sopenharmony_ci#include <linux/atomic.h>
1162306a36Sopenharmony_ci#include <linux/module.h>
1262306a36Sopenharmony_ci#include <linux/mutex.h>
1362306a36Sopenharmony_ci#include <linux/vmalloc.h>
1462306a36Sopenharmony_ci#include <net/sock.h>
1562306a36Sopenharmony_ci#include <linux/virtio_vsock.h>
1662306a36Sopenharmony_ci#include <linux/vhost.h>
1762306a36Sopenharmony_ci#include <linux/hashtable.h>
1862306a36Sopenharmony_ci
1962306a36Sopenharmony_ci#include <net/af_vsock.h>
2062306a36Sopenharmony_ci#include "vhost.h"
2162306a36Sopenharmony_ci
2262306a36Sopenharmony_ci#define VHOST_VSOCK_DEFAULT_HOST_CID	2
2362306a36Sopenharmony_ci/* Max number of bytes transferred before requeueing the job.
2462306a36Sopenharmony_ci * Using this limit prevents one virtqueue from starving others. */
2562306a36Sopenharmony_ci#define VHOST_VSOCK_WEIGHT 0x80000
2662306a36Sopenharmony_ci/* Max number of packets transferred before requeueing the job.
2762306a36Sopenharmony_ci * Using this limit prevents one virtqueue from starving others with
2862306a36Sopenharmony_ci * small pkts.
2962306a36Sopenharmony_ci */
3062306a36Sopenharmony_ci#define VHOST_VSOCK_PKT_WEIGHT 256
3162306a36Sopenharmony_ci
3262306a36Sopenharmony_cienum {
3362306a36Sopenharmony_ci	VHOST_VSOCK_FEATURES = VHOST_FEATURES |
3462306a36Sopenharmony_ci			       (1ULL << VIRTIO_F_ACCESS_PLATFORM) |
3562306a36Sopenharmony_ci			       (1ULL << VIRTIO_VSOCK_F_SEQPACKET)
3662306a36Sopenharmony_ci};
3762306a36Sopenharmony_ci
3862306a36Sopenharmony_cienum {
3962306a36Sopenharmony_ci	VHOST_VSOCK_BACKEND_FEATURES = (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2)
4062306a36Sopenharmony_ci};
4162306a36Sopenharmony_ci
4262306a36Sopenharmony_ci/* Used to track all the vhost_vsock instances on the system. */
4362306a36Sopenharmony_cistatic DEFINE_MUTEX(vhost_vsock_mutex);
4462306a36Sopenharmony_cistatic DEFINE_READ_MOSTLY_HASHTABLE(vhost_vsock_hash, 8);
4562306a36Sopenharmony_ci
4662306a36Sopenharmony_cistruct vhost_vsock {
4762306a36Sopenharmony_ci	struct vhost_dev dev;
4862306a36Sopenharmony_ci	struct vhost_virtqueue vqs[2];
4962306a36Sopenharmony_ci
5062306a36Sopenharmony_ci	/* Link to global vhost_vsock_hash, writes use vhost_vsock_mutex */
5162306a36Sopenharmony_ci	struct hlist_node hash;
5262306a36Sopenharmony_ci
5362306a36Sopenharmony_ci	struct vhost_work send_pkt_work;
5462306a36Sopenharmony_ci	struct sk_buff_head send_pkt_queue; /* host->guest pending packets */
5562306a36Sopenharmony_ci
5662306a36Sopenharmony_ci	atomic_t queued_replies;
5762306a36Sopenharmony_ci
5862306a36Sopenharmony_ci	u32 guest_cid;
5962306a36Sopenharmony_ci	bool seqpacket_allow;
6062306a36Sopenharmony_ci};
6162306a36Sopenharmony_ci
6262306a36Sopenharmony_cistatic u32 vhost_transport_get_local_cid(void)
6362306a36Sopenharmony_ci{
6462306a36Sopenharmony_ci	return VHOST_VSOCK_DEFAULT_HOST_CID;
6562306a36Sopenharmony_ci}
6662306a36Sopenharmony_ci
6762306a36Sopenharmony_ci/* Callers that dereference the return value must hold vhost_vsock_mutex or the
6862306a36Sopenharmony_ci * RCU read lock.
6962306a36Sopenharmony_ci */
7062306a36Sopenharmony_cistatic struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
7162306a36Sopenharmony_ci{
7262306a36Sopenharmony_ci	struct vhost_vsock *vsock;
7362306a36Sopenharmony_ci
7462306a36Sopenharmony_ci	hash_for_each_possible_rcu(vhost_vsock_hash, vsock, hash, guest_cid) {
7562306a36Sopenharmony_ci		u32 other_cid = vsock->guest_cid;
7662306a36Sopenharmony_ci
7762306a36Sopenharmony_ci		/* Skip instances that have no CID yet */
7862306a36Sopenharmony_ci		if (other_cid == 0)
7962306a36Sopenharmony_ci			continue;
8062306a36Sopenharmony_ci
8162306a36Sopenharmony_ci		if (other_cid == guest_cid)
8262306a36Sopenharmony_ci			return vsock;
8362306a36Sopenharmony_ci
8462306a36Sopenharmony_ci	}
8562306a36Sopenharmony_ci
8662306a36Sopenharmony_ci	return NULL;
8762306a36Sopenharmony_ci}
8862306a36Sopenharmony_ci
8962306a36Sopenharmony_cistatic void
9062306a36Sopenharmony_civhost_transport_do_send_pkt(struct vhost_vsock *vsock,
9162306a36Sopenharmony_ci			    struct vhost_virtqueue *vq)
9262306a36Sopenharmony_ci{
9362306a36Sopenharmony_ci	struct vhost_virtqueue *tx_vq = &vsock->vqs[VSOCK_VQ_TX];
9462306a36Sopenharmony_ci	int pkts = 0, total_len = 0;
9562306a36Sopenharmony_ci	bool added = false;
9662306a36Sopenharmony_ci	bool restart_tx = false;
9762306a36Sopenharmony_ci
9862306a36Sopenharmony_ci	mutex_lock(&vq->mutex);
9962306a36Sopenharmony_ci
10062306a36Sopenharmony_ci	if (!vhost_vq_get_backend(vq))
10162306a36Sopenharmony_ci		goto out;
10262306a36Sopenharmony_ci
10362306a36Sopenharmony_ci	if (!vq_meta_prefetch(vq))
10462306a36Sopenharmony_ci		goto out;
10562306a36Sopenharmony_ci
10662306a36Sopenharmony_ci	/* Avoid further vmexits, we're already processing the virtqueue */
10762306a36Sopenharmony_ci	vhost_disable_notify(&vsock->dev, vq);
10862306a36Sopenharmony_ci
10962306a36Sopenharmony_ci	do {
11062306a36Sopenharmony_ci		struct virtio_vsock_hdr *hdr;
11162306a36Sopenharmony_ci		size_t iov_len, payload_len;
11262306a36Sopenharmony_ci		struct iov_iter iov_iter;
11362306a36Sopenharmony_ci		u32 flags_to_restore = 0;
11462306a36Sopenharmony_ci		struct sk_buff *skb;
11562306a36Sopenharmony_ci		unsigned out, in;
11662306a36Sopenharmony_ci		size_t nbytes;
11762306a36Sopenharmony_ci		int head;
11862306a36Sopenharmony_ci
11962306a36Sopenharmony_ci		skb = virtio_vsock_skb_dequeue(&vsock->send_pkt_queue);
12062306a36Sopenharmony_ci
12162306a36Sopenharmony_ci		if (!skb) {
12262306a36Sopenharmony_ci			vhost_enable_notify(&vsock->dev, vq);
12362306a36Sopenharmony_ci			break;
12462306a36Sopenharmony_ci		}
12562306a36Sopenharmony_ci
12662306a36Sopenharmony_ci		head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
12762306a36Sopenharmony_ci					 &out, &in, NULL, NULL);
12862306a36Sopenharmony_ci		if (head < 0) {
12962306a36Sopenharmony_ci			virtio_vsock_skb_queue_head(&vsock->send_pkt_queue, skb);
13062306a36Sopenharmony_ci			break;
13162306a36Sopenharmony_ci		}
13262306a36Sopenharmony_ci
13362306a36Sopenharmony_ci		if (head == vq->num) {
13462306a36Sopenharmony_ci			virtio_vsock_skb_queue_head(&vsock->send_pkt_queue, skb);
13562306a36Sopenharmony_ci			/* We cannot finish yet if more buffers snuck in while
13662306a36Sopenharmony_ci			 * re-enabling notify.
13762306a36Sopenharmony_ci			 */
13862306a36Sopenharmony_ci			if (unlikely(vhost_enable_notify(&vsock->dev, vq))) {
13962306a36Sopenharmony_ci				vhost_disable_notify(&vsock->dev, vq);
14062306a36Sopenharmony_ci				continue;
14162306a36Sopenharmony_ci			}
14262306a36Sopenharmony_ci			break;
14362306a36Sopenharmony_ci		}
14462306a36Sopenharmony_ci
14562306a36Sopenharmony_ci		if (out) {
14662306a36Sopenharmony_ci			kfree_skb(skb);
14762306a36Sopenharmony_ci			vq_err(vq, "Expected 0 output buffers, got %u\n", out);
14862306a36Sopenharmony_ci			break;
14962306a36Sopenharmony_ci		}
15062306a36Sopenharmony_ci
15162306a36Sopenharmony_ci		iov_len = iov_length(&vq->iov[out], in);
15262306a36Sopenharmony_ci		if (iov_len < sizeof(*hdr)) {
15362306a36Sopenharmony_ci			kfree_skb(skb);
15462306a36Sopenharmony_ci			vq_err(vq, "Buffer len [%zu] too small\n", iov_len);
15562306a36Sopenharmony_ci			break;
15662306a36Sopenharmony_ci		}
15762306a36Sopenharmony_ci
15862306a36Sopenharmony_ci		iov_iter_init(&iov_iter, ITER_DEST, &vq->iov[out], in, iov_len);
15962306a36Sopenharmony_ci		payload_len = skb->len;
16062306a36Sopenharmony_ci		hdr = virtio_vsock_hdr(skb);
16162306a36Sopenharmony_ci
16262306a36Sopenharmony_ci		/* If the packet is greater than the space available in the
16362306a36Sopenharmony_ci		 * buffer, we split it using multiple buffers.
16462306a36Sopenharmony_ci		 */
16562306a36Sopenharmony_ci		if (payload_len > iov_len - sizeof(*hdr)) {
16662306a36Sopenharmony_ci			payload_len = iov_len - sizeof(*hdr);
16762306a36Sopenharmony_ci
16862306a36Sopenharmony_ci			/* As we are copying pieces of large packet's buffer to
16962306a36Sopenharmony_ci			 * small rx buffers, headers of packets in rx queue are
17062306a36Sopenharmony_ci			 * created dynamically and are initialized with header
17162306a36Sopenharmony_ci			 * of current packet(except length). But in case of
17262306a36Sopenharmony_ci			 * SOCK_SEQPACKET, we also must clear message delimeter
17362306a36Sopenharmony_ci			 * bit (VIRTIO_VSOCK_SEQ_EOM) and MSG_EOR bit
17462306a36Sopenharmony_ci			 * (VIRTIO_VSOCK_SEQ_EOR) if set. Otherwise,
17562306a36Sopenharmony_ci			 * there will be sequence of packets with these
17662306a36Sopenharmony_ci			 * bits set. After initialized header will be copied to
17762306a36Sopenharmony_ci			 * rx buffer, these required bits will be restored.
17862306a36Sopenharmony_ci			 */
17962306a36Sopenharmony_ci			if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOM) {
18062306a36Sopenharmony_ci				hdr->flags &= ~cpu_to_le32(VIRTIO_VSOCK_SEQ_EOM);
18162306a36Sopenharmony_ci				flags_to_restore |= VIRTIO_VSOCK_SEQ_EOM;
18262306a36Sopenharmony_ci
18362306a36Sopenharmony_ci				if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOR) {
18462306a36Sopenharmony_ci					hdr->flags &= ~cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR);
18562306a36Sopenharmony_ci					flags_to_restore |= VIRTIO_VSOCK_SEQ_EOR;
18662306a36Sopenharmony_ci				}
18762306a36Sopenharmony_ci			}
18862306a36Sopenharmony_ci		}
18962306a36Sopenharmony_ci
19062306a36Sopenharmony_ci		/* Set the correct length in the header */
19162306a36Sopenharmony_ci		hdr->len = cpu_to_le32(payload_len);
19262306a36Sopenharmony_ci
19362306a36Sopenharmony_ci		nbytes = copy_to_iter(hdr, sizeof(*hdr), &iov_iter);
19462306a36Sopenharmony_ci		if (nbytes != sizeof(*hdr)) {
19562306a36Sopenharmony_ci			kfree_skb(skb);
19662306a36Sopenharmony_ci			vq_err(vq, "Faulted on copying pkt hdr\n");
19762306a36Sopenharmony_ci			break;
19862306a36Sopenharmony_ci		}
19962306a36Sopenharmony_ci
20062306a36Sopenharmony_ci		nbytes = copy_to_iter(skb->data, payload_len, &iov_iter);
20162306a36Sopenharmony_ci		if (nbytes != payload_len) {
20262306a36Sopenharmony_ci			kfree_skb(skb);
20362306a36Sopenharmony_ci			vq_err(vq, "Faulted on copying pkt buf\n");
20462306a36Sopenharmony_ci			break;
20562306a36Sopenharmony_ci		}
20662306a36Sopenharmony_ci
20762306a36Sopenharmony_ci		/* Deliver to monitoring devices all packets that we
20862306a36Sopenharmony_ci		 * will transmit.
20962306a36Sopenharmony_ci		 */
21062306a36Sopenharmony_ci		virtio_transport_deliver_tap_pkt(skb);
21162306a36Sopenharmony_ci
21262306a36Sopenharmony_ci		vhost_add_used(vq, head, sizeof(*hdr) + payload_len);
21362306a36Sopenharmony_ci		added = true;
21462306a36Sopenharmony_ci
21562306a36Sopenharmony_ci		skb_pull(skb, payload_len);
21662306a36Sopenharmony_ci		total_len += payload_len;
21762306a36Sopenharmony_ci
21862306a36Sopenharmony_ci		/* If we didn't send all the payload we can requeue the packet
21962306a36Sopenharmony_ci		 * to send it with the next available buffer.
22062306a36Sopenharmony_ci		 */
22162306a36Sopenharmony_ci		if (skb->len > 0) {
22262306a36Sopenharmony_ci			hdr->flags |= cpu_to_le32(flags_to_restore);
22362306a36Sopenharmony_ci
22462306a36Sopenharmony_ci			/* We are queueing the same skb to handle
22562306a36Sopenharmony_ci			 * the remaining bytes, and we want to deliver it
22662306a36Sopenharmony_ci			 * to monitoring devices in the next iteration.
22762306a36Sopenharmony_ci			 */
22862306a36Sopenharmony_ci			virtio_vsock_skb_clear_tap_delivered(skb);
22962306a36Sopenharmony_ci			virtio_vsock_skb_queue_head(&vsock->send_pkt_queue, skb);
23062306a36Sopenharmony_ci		} else {
23162306a36Sopenharmony_ci			if (virtio_vsock_skb_reply(skb)) {
23262306a36Sopenharmony_ci				int val;
23362306a36Sopenharmony_ci
23462306a36Sopenharmony_ci				val = atomic_dec_return(&vsock->queued_replies);
23562306a36Sopenharmony_ci
23662306a36Sopenharmony_ci				/* Do we have resources to resume tx
23762306a36Sopenharmony_ci				 * processing?
23862306a36Sopenharmony_ci				 */
23962306a36Sopenharmony_ci				if (val + 1 == tx_vq->num)
24062306a36Sopenharmony_ci					restart_tx = true;
24162306a36Sopenharmony_ci			}
24262306a36Sopenharmony_ci
24362306a36Sopenharmony_ci			consume_skb(skb);
24462306a36Sopenharmony_ci		}
24562306a36Sopenharmony_ci	} while(likely(!vhost_exceeds_weight(vq, ++pkts, total_len)));
24662306a36Sopenharmony_ci	if (added)
24762306a36Sopenharmony_ci		vhost_signal(&vsock->dev, vq);
24862306a36Sopenharmony_ci
24962306a36Sopenharmony_ciout:
25062306a36Sopenharmony_ci	mutex_unlock(&vq->mutex);
25162306a36Sopenharmony_ci
25262306a36Sopenharmony_ci	if (restart_tx)
25362306a36Sopenharmony_ci		vhost_poll_queue(&tx_vq->poll);
25462306a36Sopenharmony_ci}
25562306a36Sopenharmony_ci
25662306a36Sopenharmony_cistatic void vhost_transport_send_pkt_work(struct vhost_work *work)
25762306a36Sopenharmony_ci{
25862306a36Sopenharmony_ci	struct vhost_virtqueue *vq;
25962306a36Sopenharmony_ci	struct vhost_vsock *vsock;
26062306a36Sopenharmony_ci
26162306a36Sopenharmony_ci	vsock = container_of(work, struct vhost_vsock, send_pkt_work);
26262306a36Sopenharmony_ci	vq = &vsock->vqs[VSOCK_VQ_RX];
26362306a36Sopenharmony_ci
26462306a36Sopenharmony_ci	vhost_transport_do_send_pkt(vsock, vq);
26562306a36Sopenharmony_ci}
26662306a36Sopenharmony_ci
26762306a36Sopenharmony_cistatic int
26862306a36Sopenharmony_civhost_transport_send_pkt(struct sk_buff *skb)
26962306a36Sopenharmony_ci{
27062306a36Sopenharmony_ci	struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
27162306a36Sopenharmony_ci	struct vhost_vsock *vsock;
27262306a36Sopenharmony_ci	int len = skb->len;
27362306a36Sopenharmony_ci
27462306a36Sopenharmony_ci	rcu_read_lock();
27562306a36Sopenharmony_ci
27662306a36Sopenharmony_ci	/* Find the vhost_vsock according to guest context id  */
27762306a36Sopenharmony_ci	vsock = vhost_vsock_get(le64_to_cpu(hdr->dst_cid));
27862306a36Sopenharmony_ci	if (!vsock) {
27962306a36Sopenharmony_ci		rcu_read_unlock();
28062306a36Sopenharmony_ci		kfree_skb(skb);
28162306a36Sopenharmony_ci		return -ENODEV;
28262306a36Sopenharmony_ci	}
28362306a36Sopenharmony_ci
28462306a36Sopenharmony_ci	if (virtio_vsock_skb_reply(skb))
28562306a36Sopenharmony_ci		atomic_inc(&vsock->queued_replies);
28662306a36Sopenharmony_ci
28762306a36Sopenharmony_ci	virtio_vsock_skb_queue_tail(&vsock->send_pkt_queue, skb);
28862306a36Sopenharmony_ci	vhost_vq_work_queue(&vsock->vqs[VSOCK_VQ_RX], &vsock->send_pkt_work);
28962306a36Sopenharmony_ci
29062306a36Sopenharmony_ci	rcu_read_unlock();
29162306a36Sopenharmony_ci	return len;
29262306a36Sopenharmony_ci}
29362306a36Sopenharmony_ci
29462306a36Sopenharmony_cistatic int
29562306a36Sopenharmony_civhost_transport_cancel_pkt(struct vsock_sock *vsk)
29662306a36Sopenharmony_ci{
29762306a36Sopenharmony_ci	struct vhost_vsock *vsock;
29862306a36Sopenharmony_ci	int cnt = 0;
29962306a36Sopenharmony_ci	int ret = -ENODEV;
30062306a36Sopenharmony_ci
30162306a36Sopenharmony_ci	rcu_read_lock();
30262306a36Sopenharmony_ci
30362306a36Sopenharmony_ci	/* Find the vhost_vsock according to guest context id  */
30462306a36Sopenharmony_ci	vsock = vhost_vsock_get(vsk->remote_addr.svm_cid);
30562306a36Sopenharmony_ci	if (!vsock)
30662306a36Sopenharmony_ci		goto out;
30762306a36Sopenharmony_ci
30862306a36Sopenharmony_ci	cnt = virtio_transport_purge_skbs(vsk, &vsock->send_pkt_queue);
30962306a36Sopenharmony_ci
31062306a36Sopenharmony_ci	if (cnt) {
31162306a36Sopenharmony_ci		struct vhost_virtqueue *tx_vq = &vsock->vqs[VSOCK_VQ_TX];
31262306a36Sopenharmony_ci		int new_cnt;
31362306a36Sopenharmony_ci
31462306a36Sopenharmony_ci		new_cnt = atomic_sub_return(cnt, &vsock->queued_replies);
31562306a36Sopenharmony_ci		if (new_cnt + cnt >= tx_vq->num && new_cnt < tx_vq->num)
31662306a36Sopenharmony_ci			vhost_poll_queue(&tx_vq->poll);
31762306a36Sopenharmony_ci	}
31862306a36Sopenharmony_ci
31962306a36Sopenharmony_ci	ret = 0;
32062306a36Sopenharmony_ciout:
32162306a36Sopenharmony_ci	rcu_read_unlock();
32262306a36Sopenharmony_ci	return ret;
32362306a36Sopenharmony_ci}
32462306a36Sopenharmony_ci
32562306a36Sopenharmony_cistatic struct sk_buff *
32662306a36Sopenharmony_civhost_vsock_alloc_skb(struct vhost_virtqueue *vq,
32762306a36Sopenharmony_ci		      unsigned int out, unsigned int in)
32862306a36Sopenharmony_ci{
32962306a36Sopenharmony_ci	struct virtio_vsock_hdr *hdr;
33062306a36Sopenharmony_ci	struct iov_iter iov_iter;
33162306a36Sopenharmony_ci	struct sk_buff *skb;
33262306a36Sopenharmony_ci	size_t payload_len;
33362306a36Sopenharmony_ci	size_t nbytes;
33462306a36Sopenharmony_ci	size_t len;
33562306a36Sopenharmony_ci
33662306a36Sopenharmony_ci	if (in != 0) {
33762306a36Sopenharmony_ci		vq_err(vq, "Expected 0 input buffers, got %u\n", in);
33862306a36Sopenharmony_ci		return NULL;
33962306a36Sopenharmony_ci	}
34062306a36Sopenharmony_ci
34162306a36Sopenharmony_ci	len = iov_length(vq->iov, out);
34262306a36Sopenharmony_ci
34362306a36Sopenharmony_ci	/* len contains both payload and hdr */
34462306a36Sopenharmony_ci	skb = virtio_vsock_alloc_skb(len, GFP_KERNEL);
34562306a36Sopenharmony_ci	if (!skb)
34662306a36Sopenharmony_ci		return NULL;
34762306a36Sopenharmony_ci
34862306a36Sopenharmony_ci	iov_iter_init(&iov_iter, ITER_SOURCE, vq->iov, out, len);
34962306a36Sopenharmony_ci
35062306a36Sopenharmony_ci	hdr = virtio_vsock_hdr(skb);
35162306a36Sopenharmony_ci	nbytes = copy_from_iter(hdr, sizeof(*hdr), &iov_iter);
35262306a36Sopenharmony_ci	if (nbytes != sizeof(*hdr)) {
35362306a36Sopenharmony_ci		vq_err(vq, "Expected %zu bytes for pkt->hdr, got %zu bytes\n",
35462306a36Sopenharmony_ci		       sizeof(*hdr), nbytes);
35562306a36Sopenharmony_ci		kfree_skb(skb);
35662306a36Sopenharmony_ci		return NULL;
35762306a36Sopenharmony_ci	}
35862306a36Sopenharmony_ci
35962306a36Sopenharmony_ci	payload_len = le32_to_cpu(hdr->len);
36062306a36Sopenharmony_ci
36162306a36Sopenharmony_ci	/* No payload */
36262306a36Sopenharmony_ci	if (!payload_len)
36362306a36Sopenharmony_ci		return skb;
36462306a36Sopenharmony_ci
36562306a36Sopenharmony_ci	/* The pkt is too big or the length in the header is invalid */
36662306a36Sopenharmony_ci	if (payload_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE ||
36762306a36Sopenharmony_ci	    payload_len + sizeof(*hdr) > len) {
36862306a36Sopenharmony_ci		kfree_skb(skb);
36962306a36Sopenharmony_ci		return NULL;
37062306a36Sopenharmony_ci	}
37162306a36Sopenharmony_ci
37262306a36Sopenharmony_ci	virtio_vsock_skb_rx_put(skb);
37362306a36Sopenharmony_ci
37462306a36Sopenharmony_ci	nbytes = copy_from_iter(skb->data, payload_len, &iov_iter);
37562306a36Sopenharmony_ci	if (nbytes != payload_len) {
37662306a36Sopenharmony_ci		vq_err(vq, "Expected %zu byte payload, got %zu bytes\n",
37762306a36Sopenharmony_ci		       payload_len, nbytes);
37862306a36Sopenharmony_ci		kfree_skb(skb);
37962306a36Sopenharmony_ci		return NULL;
38062306a36Sopenharmony_ci	}
38162306a36Sopenharmony_ci
38262306a36Sopenharmony_ci	return skb;
38362306a36Sopenharmony_ci}
38462306a36Sopenharmony_ci
38562306a36Sopenharmony_ci/* Is there space left for replies to rx packets? */
38662306a36Sopenharmony_cistatic bool vhost_vsock_more_replies(struct vhost_vsock *vsock)
38762306a36Sopenharmony_ci{
38862306a36Sopenharmony_ci	struct vhost_virtqueue *vq = &vsock->vqs[VSOCK_VQ_TX];
38962306a36Sopenharmony_ci	int val;
39062306a36Sopenharmony_ci
39162306a36Sopenharmony_ci	smp_rmb(); /* paired with atomic_inc() and atomic_dec_return() */
39262306a36Sopenharmony_ci	val = atomic_read(&vsock->queued_replies);
39362306a36Sopenharmony_ci
39462306a36Sopenharmony_ci	return val < vq->num;
39562306a36Sopenharmony_ci}
39662306a36Sopenharmony_ci
39762306a36Sopenharmony_cistatic bool vhost_transport_seqpacket_allow(u32 remote_cid);
39862306a36Sopenharmony_ci
39962306a36Sopenharmony_cistatic struct virtio_transport vhost_transport = {
40062306a36Sopenharmony_ci	.transport = {
40162306a36Sopenharmony_ci		.module                   = THIS_MODULE,
40262306a36Sopenharmony_ci
40362306a36Sopenharmony_ci		.get_local_cid            = vhost_transport_get_local_cid,
40462306a36Sopenharmony_ci
40562306a36Sopenharmony_ci		.init                     = virtio_transport_do_socket_init,
40662306a36Sopenharmony_ci		.destruct                 = virtio_transport_destruct,
40762306a36Sopenharmony_ci		.release                  = virtio_transport_release,
40862306a36Sopenharmony_ci		.connect                  = virtio_transport_connect,
40962306a36Sopenharmony_ci		.shutdown                 = virtio_transport_shutdown,
41062306a36Sopenharmony_ci		.cancel_pkt               = vhost_transport_cancel_pkt,
41162306a36Sopenharmony_ci
41262306a36Sopenharmony_ci		.dgram_enqueue            = virtio_transport_dgram_enqueue,
41362306a36Sopenharmony_ci		.dgram_dequeue            = virtio_transport_dgram_dequeue,
41462306a36Sopenharmony_ci		.dgram_bind               = virtio_transport_dgram_bind,
41562306a36Sopenharmony_ci		.dgram_allow              = virtio_transport_dgram_allow,
41662306a36Sopenharmony_ci
41762306a36Sopenharmony_ci		.stream_enqueue           = virtio_transport_stream_enqueue,
41862306a36Sopenharmony_ci		.stream_dequeue           = virtio_transport_stream_dequeue,
41962306a36Sopenharmony_ci		.stream_has_data          = virtio_transport_stream_has_data,
42062306a36Sopenharmony_ci		.stream_has_space         = virtio_transport_stream_has_space,
42162306a36Sopenharmony_ci		.stream_rcvhiwat          = virtio_transport_stream_rcvhiwat,
42262306a36Sopenharmony_ci		.stream_is_active         = virtio_transport_stream_is_active,
42362306a36Sopenharmony_ci		.stream_allow             = virtio_transport_stream_allow,
42462306a36Sopenharmony_ci
42562306a36Sopenharmony_ci		.seqpacket_dequeue        = virtio_transport_seqpacket_dequeue,
42662306a36Sopenharmony_ci		.seqpacket_enqueue        = virtio_transport_seqpacket_enqueue,
42762306a36Sopenharmony_ci		.seqpacket_allow          = vhost_transport_seqpacket_allow,
42862306a36Sopenharmony_ci		.seqpacket_has_data       = virtio_transport_seqpacket_has_data,
42962306a36Sopenharmony_ci
43062306a36Sopenharmony_ci		.notify_poll_in           = virtio_transport_notify_poll_in,
43162306a36Sopenharmony_ci		.notify_poll_out          = virtio_transport_notify_poll_out,
43262306a36Sopenharmony_ci		.notify_recv_init         = virtio_transport_notify_recv_init,
43362306a36Sopenharmony_ci		.notify_recv_pre_block    = virtio_transport_notify_recv_pre_block,
43462306a36Sopenharmony_ci		.notify_recv_pre_dequeue  = virtio_transport_notify_recv_pre_dequeue,
43562306a36Sopenharmony_ci		.notify_recv_post_dequeue = virtio_transport_notify_recv_post_dequeue,
43662306a36Sopenharmony_ci		.notify_send_init         = virtio_transport_notify_send_init,
43762306a36Sopenharmony_ci		.notify_send_pre_block    = virtio_transport_notify_send_pre_block,
43862306a36Sopenharmony_ci		.notify_send_pre_enqueue  = virtio_transport_notify_send_pre_enqueue,
43962306a36Sopenharmony_ci		.notify_send_post_enqueue = virtio_transport_notify_send_post_enqueue,
44062306a36Sopenharmony_ci		.notify_buffer_size       = virtio_transport_notify_buffer_size,
44162306a36Sopenharmony_ci		.notify_set_rcvlowat      = virtio_transport_notify_set_rcvlowat,
44262306a36Sopenharmony_ci
44362306a36Sopenharmony_ci		.read_skb = virtio_transport_read_skb,
44462306a36Sopenharmony_ci	},
44562306a36Sopenharmony_ci
44662306a36Sopenharmony_ci	.send_pkt = vhost_transport_send_pkt,
44762306a36Sopenharmony_ci};
44862306a36Sopenharmony_ci
44962306a36Sopenharmony_cistatic bool vhost_transport_seqpacket_allow(u32 remote_cid)
45062306a36Sopenharmony_ci{
45162306a36Sopenharmony_ci	struct vhost_vsock *vsock;
45262306a36Sopenharmony_ci	bool seqpacket_allow = false;
45362306a36Sopenharmony_ci
45462306a36Sopenharmony_ci	rcu_read_lock();
45562306a36Sopenharmony_ci	vsock = vhost_vsock_get(remote_cid);
45662306a36Sopenharmony_ci
45762306a36Sopenharmony_ci	if (vsock)
45862306a36Sopenharmony_ci		seqpacket_allow = vsock->seqpacket_allow;
45962306a36Sopenharmony_ci
46062306a36Sopenharmony_ci	rcu_read_unlock();
46162306a36Sopenharmony_ci
46262306a36Sopenharmony_ci	return seqpacket_allow;
46362306a36Sopenharmony_ci}
46462306a36Sopenharmony_ci
46562306a36Sopenharmony_cistatic void vhost_vsock_handle_tx_kick(struct vhost_work *work)
46662306a36Sopenharmony_ci{
46762306a36Sopenharmony_ci	struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
46862306a36Sopenharmony_ci						  poll.work);
46962306a36Sopenharmony_ci	struct vhost_vsock *vsock = container_of(vq->dev, struct vhost_vsock,
47062306a36Sopenharmony_ci						 dev);
47162306a36Sopenharmony_ci	int head, pkts = 0, total_len = 0;
47262306a36Sopenharmony_ci	unsigned int out, in;
47362306a36Sopenharmony_ci	struct sk_buff *skb;
47462306a36Sopenharmony_ci	bool added = false;
47562306a36Sopenharmony_ci
47662306a36Sopenharmony_ci	mutex_lock(&vq->mutex);
47762306a36Sopenharmony_ci
47862306a36Sopenharmony_ci	if (!vhost_vq_get_backend(vq))
47962306a36Sopenharmony_ci		goto out;
48062306a36Sopenharmony_ci
48162306a36Sopenharmony_ci	if (!vq_meta_prefetch(vq))
48262306a36Sopenharmony_ci		goto out;
48362306a36Sopenharmony_ci
48462306a36Sopenharmony_ci	vhost_disable_notify(&vsock->dev, vq);
48562306a36Sopenharmony_ci	do {
48662306a36Sopenharmony_ci		struct virtio_vsock_hdr *hdr;
48762306a36Sopenharmony_ci
48862306a36Sopenharmony_ci		if (!vhost_vsock_more_replies(vsock)) {
48962306a36Sopenharmony_ci			/* Stop tx until the device processes already
49062306a36Sopenharmony_ci			 * pending replies.  Leave tx virtqueue
49162306a36Sopenharmony_ci			 * callbacks disabled.
49262306a36Sopenharmony_ci			 */
49362306a36Sopenharmony_ci			goto no_more_replies;
49462306a36Sopenharmony_ci		}
49562306a36Sopenharmony_ci
49662306a36Sopenharmony_ci		head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
49762306a36Sopenharmony_ci					 &out, &in, NULL, NULL);
49862306a36Sopenharmony_ci		if (head < 0)
49962306a36Sopenharmony_ci			break;
50062306a36Sopenharmony_ci
50162306a36Sopenharmony_ci		if (head == vq->num) {
50262306a36Sopenharmony_ci			if (unlikely(vhost_enable_notify(&vsock->dev, vq))) {
50362306a36Sopenharmony_ci				vhost_disable_notify(&vsock->dev, vq);
50462306a36Sopenharmony_ci				continue;
50562306a36Sopenharmony_ci			}
50662306a36Sopenharmony_ci			break;
50762306a36Sopenharmony_ci		}
50862306a36Sopenharmony_ci
50962306a36Sopenharmony_ci		skb = vhost_vsock_alloc_skb(vq, out, in);
51062306a36Sopenharmony_ci		if (!skb) {
51162306a36Sopenharmony_ci			vq_err(vq, "Faulted on pkt\n");
51262306a36Sopenharmony_ci			continue;
51362306a36Sopenharmony_ci		}
51462306a36Sopenharmony_ci
51562306a36Sopenharmony_ci		total_len += sizeof(*hdr) + skb->len;
51662306a36Sopenharmony_ci
51762306a36Sopenharmony_ci		/* Deliver to monitoring devices all received packets */
51862306a36Sopenharmony_ci		virtio_transport_deliver_tap_pkt(skb);
51962306a36Sopenharmony_ci
52062306a36Sopenharmony_ci		hdr = virtio_vsock_hdr(skb);
52162306a36Sopenharmony_ci
52262306a36Sopenharmony_ci		/* Only accept correctly addressed packets */
52362306a36Sopenharmony_ci		if (le64_to_cpu(hdr->src_cid) == vsock->guest_cid &&
52462306a36Sopenharmony_ci		    le64_to_cpu(hdr->dst_cid) ==
52562306a36Sopenharmony_ci		    vhost_transport_get_local_cid())
52662306a36Sopenharmony_ci			virtio_transport_recv_pkt(&vhost_transport, skb);
52762306a36Sopenharmony_ci		else
52862306a36Sopenharmony_ci			kfree_skb(skb);
52962306a36Sopenharmony_ci
53062306a36Sopenharmony_ci		vhost_add_used(vq, head, 0);
53162306a36Sopenharmony_ci		added = true;
53262306a36Sopenharmony_ci	} while(likely(!vhost_exceeds_weight(vq, ++pkts, total_len)));
53362306a36Sopenharmony_ci
53462306a36Sopenharmony_cino_more_replies:
53562306a36Sopenharmony_ci	if (added)
53662306a36Sopenharmony_ci		vhost_signal(&vsock->dev, vq);
53762306a36Sopenharmony_ci
53862306a36Sopenharmony_ciout:
53962306a36Sopenharmony_ci	mutex_unlock(&vq->mutex);
54062306a36Sopenharmony_ci}
54162306a36Sopenharmony_ci
54262306a36Sopenharmony_cistatic void vhost_vsock_handle_rx_kick(struct vhost_work *work)
54362306a36Sopenharmony_ci{
54462306a36Sopenharmony_ci	struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
54562306a36Sopenharmony_ci						poll.work);
54662306a36Sopenharmony_ci	struct vhost_vsock *vsock = container_of(vq->dev, struct vhost_vsock,
54762306a36Sopenharmony_ci						 dev);
54862306a36Sopenharmony_ci
54962306a36Sopenharmony_ci	vhost_transport_do_send_pkt(vsock, vq);
55062306a36Sopenharmony_ci}
55162306a36Sopenharmony_ci
55262306a36Sopenharmony_cistatic int vhost_vsock_start(struct vhost_vsock *vsock)
55362306a36Sopenharmony_ci{
55462306a36Sopenharmony_ci	struct vhost_virtqueue *vq;
55562306a36Sopenharmony_ci	size_t i;
55662306a36Sopenharmony_ci	int ret;
55762306a36Sopenharmony_ci
55862306a36Sopenharmony_ci	mutex_lock(&vsock->dev.mutex);
55962306a36Sopenharmony_ci
56062306a36Sopenharmony_ci	ret = vhost_dev_check_owner(&vsock->dev);
56162306a36Sopenharmony_ci	if (ret)
56262306a36Sopenharmony_ci		goto err;
56362306a36Sopenharmony_ci
56462306a36Sopenharmony_ci	for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
56562306a36Sopenharmony_ci		vq = &vsock->vqs[i];
56662306a36Sopenharmony_ci
56762306a36Sopenharmony_ci		mutex_lock(&vq->mutex);
56862306a36Sopenharmony_ci
56962306a36Sopenharmony_ci		if (!vhost_vq_access_ok(vq)) {
57062306a36Sopenharmony_ci			ret = -EFAULT;
57162306a36Sopenharmony_ci			goto err_vq;
57262306a36Sopenharmony_ci		}
57362306a36Sopenharmony_ci
57462306a36Sopenharmony_ci		if (!vhost_vq_get_backend(vq)) {
57562306a36Sopenharmony_ci			vhost_vq_set_backend(vq, vsock);
57662306a36Sopenharmony_ci			ret = vhost_vq_init_access(vq);
57762306a36Sopenharmony_ci			if (ret)
57862306a36Sopenharmony_ci				goto err_vq;
57962306a36Sopenharmony_ci		}
58062306a36Sopenharmony_ci
58162306a36Sopenharmony_ci		mutex_unlock(&vq->mutex);
58262306a36Sopenharmony_ci	}
58362306a36Sopenharmony_ci
58462306a36Sopenharmony_ci	/* Some packets may have been queued before the device was started,
58562306a36Sopenharmony_ci	 * let's kick the send worker to send them.
58662306a36Sopenharmony_ci	 */
58762306a36Sopenharmony_ci	vhost_vq_work_queue(&vsock->vqs[VSOCK_VQ_RX], &vsock->send_pkt_work);
58862306a36Sopenharmony_ci
58962306a36Sopenharmony_ci	mutex_unlock(&vsock->dev.mutex);
59062306a36Sopenharmony_ci	return 0;
59162306a36Sopenharmony_ci
59262306a36Sopenharmony_cierr_vq:
59362306a36Sopenharmony_ci	vhost_vq_set_backend(vq, NULL);
59462306a36Sopenharmony_ci	mutex_unlock(&vq->mutex);
59562306a36Sopenharmony_ci
59662306a36Sopenharmony_ci	for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
59762306a36Sopenharmony_ci		vq = &vsock->vqs[i];
59862306a36Sopenharmony_ci
59962306a36Sopenharmony_ci		mutex_lock(&vq->mutex);
60062306a36Sopenharmony_ci		vhost_vq_set_backend(vq, NULL);
60162306a36Sopenharmony_ci		mutex_unlock(&vq->mutex);
60262306a36Sopenharmony_ci	}
60362306a36Sopenharmony_cierr:
60462306a36Sopenharmony_ci	mutex_unlock(&vsock->dev.mutex);
60562306a36Sopenharmony_ci	return ret;
60662306a36Sopenharmony_ci}
60762306a36Sopenharmony_ci
60862306a36Sopenharmony_cistatic int vhost_vsock_stop(struct vhost_vsock *vsock, bool check_owner)
60962306a36Sopenharmony_ci{
61062306a36Sopenharmony_ci	size_t i;
61162306a36Sopenharmony_ci	int ret = 0;
61262306a36Sopenharmony_ci
61362306a36Sopenharmony_ci	mutex_lock(&vsock->dev.mutex);
61462306a36Sopenharmony_ci
61562306a36Sopenharmony_ci	if (check_owner) {
61662306a36Sopenharmony_ci		ret = vhost_dev_check_owner(&vsock->dev);
61762306a36Sopenharmony_ci		if (ret)
61862306a36Sopenharmony_ci			goto err;
61962306a36Sopenharmony_ci	}
62062306a36Sopenharmony_ci
62162306a36Sopenharmony_ci	for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
62262306a36Sopenharmony_ci		struct vhost_virtqueue *vq = &vsock->vqs[i];
62362306a36Sopenharmony_ci
62462306a36Sopenharmony_ci		mutex_lock(&vq->mutex);
62562306a36Sopenharmony_ci		vhost_vq_set_backend(vq, NULL);
62662306a36Sopenharmony_ci		mutex_unlock(&vq->mutex);
62762306a36Sopenharmony_ci	}
62862306a36Sopenharmony_ci
62962306a36Sopenharmony_cierr:
63062306a36Sopenharmony_ci	mutex_unlock(&vsock->dev.mutex);
63162306a36Sopenharmony_ci	return ret;
63262306a36Sopenharmony_ci}
63362306a36Sopenharmony_ci
63462306a36Sopenharmony_cistatic void vhost_vsock_free(struct vhost_vsock *vsock)
63562306a36Sopenharmony_ci{
63662306a36Sopenharmony_ci	kvfree(vsock);
63762306a36Sopenharmony_ci}
63862306a36Sopenharmony_ci
63962306a36Sopenharmony_cistatic int vhost_vsock_dev_open(struct inode *inode, struct file *file)
64062306a36Sopenharmony_ci{
64162306a36Sopenharmony_ci	struct vhost_virtqueue **vqs;
64262306a36Sopenharmony_ci	struct vhost_vsock *vsock;
64362306a36Sopenharmony_ci	int ret;
64462306a36Sopenharmony_ci
64562306a36Sopenharmony_ci	/* This struct is large and allocation could fail, fall back to vmalloc
64662306a36Sopenharmony_ci	 * if there is no other way.
64762306a36Sopenharmony_ci	 */
64862306a36Sopenharmony_ci	vsock = kvmalloc(sizeof(*vsock), GFP_KERNEL | __GFP_RETRY_MAYFAIL);
64962306a36Sopenharmony_ci	if (!vsock)
65062306a36Sopenharmony_ci		return -ENOMEM;
65162306a36Sopenharmony_ci
65262306a36Sopenharmony_ci	vqs = kmalloc_array(ARRAY_SIZE(vsock->vqs), sizeof(*vqs), GFP_KERNEL);
65362306a36Sopenharmony_ci	if (!vqs) {
65462306a36Sopenharmony_ci		ret = -ENOMEM;
65562306a36Sopenharmony_ci		goto out;
65662306a36Sopenharmony_ci	}
65762306a36Sopenharmony_ci
65862306a36Sopenharmony_ci	vsock->guest_cid = 0; /* no CID assigned yet */
65962306a36Sopenharmony_ci
66062306a36Sopenharmony_ci	atomic_set(&vsock->queued_replies, 0);
66162306a36Sopenharmony_ci
66262306a36Sopenharmony_ci	vqs[VSOCK_VQ_TX] = &vsock->vqs[VSOCK_VQ_TX];
66362306a36Sopenharmony_ci	vqs[VSOCK_VQ_RX] = &vsock->vqs[VSOCK_VQ_RX];
66462306a36Sopenharmony_ci	vsock->vqs[VSOCK_VQ_TX].handle_kick = vhost_vsock_handle_tx_kick;
66562306a36Sopenharmony_ci	vsock->vqs[VSOCK_VQ_RX].handle_kick = vhost_vsock_handle_rx_kick;
66662306a36Sopenharmony_ci
66762306a36Sopenharmony_ci	vhost_dev_init(&vsock->dev, vqs, ARRAY_SIZE(vsock->vqs),
66862306a36Sopenharmony_ci		       UIO_MAXIOV, VHOST_VSOCK_PKT_WEIGHT,
66962306a36Sopenharmony_ci		       VHOST_VSOCK_WEIGHT, true, NULL);
67062306a36Sopenharmony_ci
67162306a36Sopenharmony_ci	file->private_data = vsock;
67262306a36Sopenharmony_ci	skb_queue_head_init(&vsock->send_pkt_queue);
67362306a36Sopenharmony_ci	vhost_work_init(&vsock->send_pkt_work, vhost_transport_send_pkt_work);
67462306a36Sopenharmony_ci	return 0;
67562306a36Sopenharmony_ci
67662306a36Sopenharmony_ciout:
67762306a36Sopenharmony_ci	vhost_vsock_free(vsock);
67862306a36Sopenharmony_ci	return ret;
67962306a36Sopenharmony_ci}
68062306a36Sopenharmony_ci
68162306a36Sopenharmony_cistatic void vhost_vsock_flush(struct vhost_vsock *vsock)
68262306a36Sopenharmony_ci{
68362306a36Sopenharmony_ci	vhost_dev_flush(&vsock->dev);
68462306a36Sopenharmony_ci}
68562306a36Sopenharmony_ci
68662306a36Sopenharmony_cistatic void vhost_vsock_reset_orphans(struct sock *sk)
68762306a36Sopenharmony_ci{
68862306a36Sopenharmony_ci	struct vsock_sock *vsk = vsock_sk(sk);
68962306a36Sopenharmony_ci
69062306a36Sopenharmony_ci	/* vmci_transport.c doesn't take sk_lock here either.  At least we're
69162306a36Sopenharmony_ci	 * under vsock_table_lock so the sock cannot disappear while we're
69262306a36Sopenharmony_ci	 * executing.
69362306a36Sopenharmony_ci	 */
69462306a36Sopenharmony_ci
69562306a36Sopenharmony_ci	/* If the peer is still valid, no need to reset connection */
69662306a36Sopenharmony_ci	if (vhost_vsock_get(vsk->remote_addr.svm_cid))
69762306a36Sopenharmony_ci		return;
69862306a36Sopenharmony_ci
69962306a36Sopenharmony_ci	/* If the close timeout is pending, let it expire.  This avoids races
70062306a36Sopenharmony_ci	 * with the timeout callback.
70162306a36Sopenharmony_ci	 */
70262306a36Sopenharmony_ci	if (vsk->close_work_scheduled)
70362306a36Sopenharmony_ci		return;
70462306a36Sopenharmony_ci
70562306a36Sopenharmony_ci	sock_set_flag(sk, SOCK_DONE);
70662306a36Sopenharmony_ci	vsk->peer_shutdown = SHUTDOWN_MASK;
70762306a36Sopenharmony_ci	sk->sk_state = SS_UNCONNECTED;
70862306a36Sopenharmony_ci	sk->sk_err = ECONNRESET;
70962306a36Sopenharmony_ci	sk_error_report(sk);
71062306a36Sopenharmony_ci}
71162306a36Sopenharmony_ci
71262306a36Sopenharmony_cistatic int vhost_vsock_dev_release(struct inode *inode, struct file *file)
71362306a36Sopenharmony_ci{
71462306a36Sopenharmony_ci	struct vhost_vsock *vsock = file->private_data;
71562306a36Sopenharmony_ci
71662306a36Sopenharmony_ci	mutex_lock(&vhost_vsock_mutex);
71762306a36Sopenharmony_ci	if (vsock->guest_cid)
71862306a36Sopenharmony_ci		hash_del_rcu(&vsock->hash);
71962306a36Sopenharmony_ci	mutex_unlock(&vhost_vsock_mutex);
72062306a36Sopenharmony_ci
72162306a36Sopenharmony_ci	/* Wait for other CPUs to finish using vsock */
72262306a36Sopenharmony_ci	synchronize_rcu();
72362306a36Sopenharmony_ci
72462306a36Sopenharmony_ci	/* Iterating over all connections for all CIDs to find orphans is
72562306a36Sopenharmony_ci	 * inefficient.  Room for improvement here. */
72662306a36Sopenharmony_ci	vsock_for_each_connected_socket(&vhost_transport.transport,
72762306a36Sopenharmony_ci					vhost_vsock_reset_orphans);
72862306a36Sopenharmony_ci
72962306a36Sopenharmony_ci	/* Don't check the owner, because we are in the release path, so we
73062306a36Sopenharmony_ci	 * need to stop the vsock device in any case.
73162306a36Sopenharmony_ci	 * vhost_vsock_stop() can not fail in this case, so we don't need to
73262306a36Sopenharmony_ci	 * check the return code.
73362306a36Sopenharmony_ci	 */
73462306a36Sopenharmony_ci	vhost_vsock_stop(vsock, false);
73562306a36Sopenharmony_ci	vhost_vsock_flush(vsock);
73662306a36Sopenharmony_ci	vhost_dev_stop(&vsock->dev);
73762306a36Sopenharmony_ci
73862306a36Sopenharmony_ci	virtio_vsock_skb_queue_purge(&vsock->send_pkt_queue);
73962306a36Sopenharmony_ci
74062306a36Sopenharmony_ci	vhost_dev_cleanup(&vsock->dev);
74162306a36Sopenharmony_ci	kfree(vsock->dev.vqs);
74262306a36Sopenharmony_ci	vhost_vsock_free(vsock);
74362306a36Sopenharmony_ci	return 0;
74462306a36Sopenharmony_ci}
74562306a36Sopenharmony_ci
74662306a36Sopenharmony_cistatic int vhost_vsock_set_cid(struct vhost_vsock *vsock, u64 guest_cid)
74762306a36Sopenharmony_ci{
74862306a36Sopenharmony_ci	struct vhost_vsock *other;
74962306a36Sopenharmony_ci
75062306a36Sopenharmony_ci	/* Refuse reserved CIDs */
75162306a36Sopenharmony_ci	if (guest_cid <= VMADDR_CID_HOST ||
75262306a36Sopenharmony_ci	    guest_cid == U32_MAX)
75362306a36Sopenharmony_ci		return -EINVAL;
75462306a36Sopenharmony_ci
75562306a36Sopenharmony_ci	/* 64-bit CIDs are not yet supported */
75662306a36Sopenharmony_ci	if (guest_cid > U32_MAX)
75762306a36Sopenharmony_ci		return -EINVAL;
75862306a36Sopenharmony_ci
75962306a36Sopenharmony_ci	/* Refuse if CID is assigned to the guest->host transport (i.e. nested
76062306a36Sopenharmony_ci	 * VM), to make the loopback work.
76162306a36Sopenharmony_ci	 */
76262306a36Sopenharmony_ci	if (vsock_find_cid(guest_cid))
76362306a36Sopenharmony_ci		return -EADDRINUSE;
76462306a36Sopenharmony_ci
76562306a36Sopenharmony_ci	/* Refuse if CID is already in use */
76662306a36Sopenharmony_ci	mutex_lock(&vhost_vsock_mutex);
76762306a36Sopenharmony_ci	other = vhost_vsock_get(guest_cid);
76862306a36Sopenharmony_ci	if (other && other != vsock) {
76962306a36Sopenharmony_ci		mutex_unlock(&vhost_vsock_mutex);
77062306a36Sopenharmony_ci		return -EADDRINUSE;
77162306a36Sopenharmony_ci	}
77262306a36Sopenharmony_ci
77362306a36Sopenharmony_ci	if (vsock->guest_cid)
77462306a36Sopenharmony_ci		hash_del_rcu(&vsock->hash);
77562306a36Sopenharmony_ci
77662306a36Sopenharmony_ci	vsock->guest_cid = guest_cid;
77762306a36Sopenharmony_ci	hash_add_rcu(vhost_vsock_hash, &vsock->hash, vsock->guest_cid);
77862306a36Sopenharmony_ci	mutex_unlock(&vhost_vsock_mutex);
77962306a36Sopenharmony_ci
78062306a36Sopenharmony_ci	return 0;
78162306a36Sopenharmony_ci}
78262306a36Sopenharmony_ci
78362306a36Sopenharmony_cistatic int vhost_vsock_set_features(struct vhost_vsock *vsock, u64 features)
78462306a36Sopenharmony_ci{
78562306a36Sopenharmony_ci	struct vhost_virtqueue *vq;
78662306a36Sopenharmony_ci	int i;
78762306a36Sopenharmony_ci
78862306a36Sopenharmony_ci	if (features & ~VHOST_VSOCK_FEATURES)
78962306a36Sopenharmony_ci		return -EOPNOTSUPP;
79062306a36Sopenharmony_ci
79162306a36Sopenharmony_ci	mutex_lock(&vsock->dev.mutex);
79262306a36Sopenharmony_ci	if ((features & (1 << VHOST_F_LOG_ALL)) &&
79362306a36Sopenharmony_ci	    !vhost_log_access_ok(&vsock->dev)) {
79462306a36Sopenharmony_ci		goto err;
79562306a36Sopenharmony_ci	}
79662306a36Sopenharmony_ci
79762306a36Sopenharmony_ci	if ((features & (1ULL << VIRTIO_F_ACCESS_PLATFORM))) {
79862306a36Sopenharmony_ci		if (vhost_init_device_iotlb(&vsock->dev))
79962306a36Sopenharmony_ci			goto err;
80062306a36Sopenharmony_ci	}
80162306a36Sopenharmony_ci
80262306a36Sopenharmony_ci	if (features & (1ULL << VIRTIO_VSOCK_F_SEQPACKET))
80362306a36Sopenharmony_ci		vsock->seqpacket_allow = true;
80462306a36Sopenharmony_ci
80562306a36Sopenharmony_ci	for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
80662306a36Sopenharmony_ci		vq = &vsock->vqs[i];
80762306a36Sopenharmony_ci		mutex_lock(&vq->mutex);
80862306a36Sopenharmony_ci		vq->acked_features = features;
80962306a36Sopenharmony_ci		mutex_unlock(&vq->mutex);
81062306a36Sopenharmony_ci	}
81162306a36Sopenharmony_ci	mutex_unlock(&vsock->dev.mutex);
81262306a36Sopenharmony_ci	return 0;
81362306a36Sopenharmony_ci
81462306a36Sopenharmony_cierr:
81562306a36Sopenharmony_ci	mutex_unlock(&vsock->dev.mutex);
81662306a36Sopenharmony_ci	return -EFAULT;
81762306a36Sopenharmony_ci}
81862306a36Sopenharmony_ci
81962306a36Sopenharmony_cistatic long vhost_vsock_dev_ioctl(struct file *f, unsigned int ioctl,
82062306a36Sopenharmony_ci				  unsigned long arg)
82162306a36Sopenharmony_ci{
82262306a36Sopenharmony_ci	struct vhost_vsock *vsock = f->private_data;
82362306a36Sopenharmony_ci	void __user *argp = (void __user *)arg;
82462306a36Sopenharmony_ci	u64 guest_cid;
82562306a36Sopenharmony_ci	u64 features;
82662306a36Sopenharmony_ci	int start;
82762306a36Sopenharmony_ci	int r;
82862306a36Sopenharmony_ci
82962306a36Sopenharmony_ci	switch (ioctl) {
83062306a36Sopenharmony_ci	case VHOST_VSOCK_SET_GUEST_CID:
83162306a36Sopenharmony_ci		if (copy_from_user(&guest_cid, argp, sizeof(guest_cid)))
83262306a36Sopenharmony_ci			return -EFAULT;
83362306a36Sopenharmony_ci		return vhost_vsock_set_cid(vsock, guest_cid);
83462306a36Sopenharmony_ci	case VHOST_VSOCK_SET_RUNNING:
83562306a36Sopenharmony_ci		if (copy_from_user(&start, argp, sizeof(start)))
83662306a36Sopenharmony_ci			return -EFAULT;
83762306a36Sopenharmony_ci		if (start)
83862306a36Sopenharmony_ci			return vhost_vsock_start(vsock);
83962306a36Sopenharmony_ci		else
84062306a36Sopenharmony_ci			return vhost_vsock_stop(vsock, true);
84162306a36Sopenharmony_ci	case VHOST_GET_FEATURES:
84262306a36Sopenharmony_ci		features = VHOST_VSOCK_FEATURES;
84362306a36Sopenharmony_ci		if (copy_to_user(argp, &features, sizeof(features)))
84462306a36Sopenharmony_ci			return -EFAULT;
84562306a36Sopenharmony_ci		return 0;
84662306a36Sopenharmony_ci	case VHOST_SET_FEATURES:
84762306a36Sopenharmony_ci		if (copy_from_user(&features, argp, sizeof(features)))
84862306a36Sopenharmony_ci			return -EFAULT;
84962306a36Sopenharmony_ci		return vhost_vsock_set_features(vsock, features);
85062306a36Sopenharmony_ci	case VHOST_GET_BACKEND_FEATURES:
85162306a36Sopenharmony_ci		features = VHOST_VSOCK_BACKEND_FEATURES;
85262306a36Sopenharmony_ci		if (copy_to_user(argp, &features, sizeof(features)))
85362306a36Sopenharmony_ci			return -EFAULT;
85462306a36Sopenharmony_ci		return 0;
85562306a36Sopenharmony_ci	case VHOST_SET_BACKEND_FEATURES:
85662306a36Sopenharmony_ci		if (copy_from_user(&features, argp, sizeof(features)))
85762306a36Sopenharmony_ci			return -EFAULT;
85862306a36Sopenharmony_ci		if (features & ~VHOST_VSOCK_BACKEND_FEATURES)
85962306a36Sopenharmony_ci			return -EOPNOTSUPP;
86062306a36Sopenharmony_ci		vhost_set_backend_features(&vsock->dev, features);
86162306a36Sopenharmony_ci		return 0;
86262306a36Sopenharmony_ci	default:
86362306a36Sopenharmony_ci		mutex_lock(&vsock->dev.mutex);
86462306a36Sopenharmony_ci		r = vhost_dev_ioctl(&vsock->dev, ioctl, argp);
86562306a36Sopenharmony_ci		if (r == -ENOIOCTLCMD)
86662306a36Sopenharmony_ci			r = vhost_vring_ioctl(&vsock->dev, ioctl, argp);
86762306a36Sopenharmony_ci		else
86862306a36Sopenharmony_ci			vhost_vsock_flush(vsock);
86962306a36Sopenharmony_ci		mutex_unlock(&vsock->dev.mutex);
87062306a36Sopenharmony_ci		return r;
87162306a36Sopenharmony_ci	}
87262306a36Sopenharmony_ci}
87362306a36Sopenharmony_ci
87462306a36Sopenharmony_cistatic ssize_t vhost_vsock_chr_read_iter(struct kiocb *iocb, struct iov_iter *to)
87562306a36Sopenharmony_ci{
87662306a36Sopenharmony_ci	struct file *file = iocb->ki_filp;
87762306a36Sopenharmony_ci	struct vhost_vsock *vsock = file->private_data;
87862306a36Sopenharmony_ci	struct vhost_dev *dev = &vsock->dev;
87962306a36Sopenharmony_ci	int noblock = file->f_flags & O_NONBLOCK;
88062306a36Sopenharmony_ci
88162306a36Sopenharmony_ci	return vhost_chr_read_iter(dev, to, noblock);
88262306a36Sopenharmony_ci}
88362306a36Sopenharmony_ci
88462306a36Sopenharmony_cistatic ssize_t vhost_vsock_chr_write_iter(struct kiocb *iocb,
88562306a36Sopenharmony_ci					struct iov_iter *from)
88662306a36Sopenharmony_ci{
88762306a36Sopenharmony_ci	struct file *file = iocb->ki_filp;
88862306a36Sopenharmony_ci	struct vhost_vsock *vsock = file->private_data;
88962306a36Sopenharmony_ci	struct vhost_dev *dev = &vsock->dev;
89062306a36Sopenharmony_ci
89162306a36Sopenharmony_ci	return vhost_chr_write_iter(dev, from);
89262306a36Sopenharmony_ci}
89362306a36Sopenharmony_ci
89462306a36Sopenharmony_cistatic __poll_t vhost_vsock_chr_poll(struct file *file, poll_table *wait)
89562306a36Sopenharmony_ci{
89662306a36Sopenharmony_ci	struct vhost_vsock *vsock = file->private_data;
89762306a36Sopenharmony_ci	struct vhost_dev *dev = &vsock->dev;
89862306a36Sopenharmony_ci
89962306a36Sopenharmony_ci	return vhost_chr_poll(file, dev, wait);
90062306a36Sopenharmony_ci}
90162306a36Sopenharmony_ci
90262306a36Sopenharmony_cistatic const struct file_operations vhost_vsock_fops = {
90362306a36Sopenharmony_ci	.owner          = THIS_MODULE,
90462306a36Sopenharmony_ci	.open           = vhost_vsock_dev_open,
90562306a36Sopenharmony_ci	.release        = vhost_vsock_dev_release,
90662306a36Sopenharmony_ci	.llseek		= noop_llseek,
90762306a36Sopenharmony_ci	.unlocked_ioctl = vhost_vsock_dev_ioctl,
90862306a36Sopenharmony_ci	.compat_ioctl   = compat_ptr_ioctl,
90962306a36Sopenharmony_ci	.read_iter      = vhost_vsock_chr_read_iter,
91062306a36Sopenharmony_ci	.write_iter     = vhost_vsock_chr_write_iter,
91162306a36Sopenharmony_ci	.poll           = vhost_vsock_chr_poll,
91262306a36Sopenharmony_ci};
91362306a36Sopenharmony_ci
91462306a36Sopenharmony_cistatic struct miscdevice vhost_vsock_misc = {
91562306a36Sopenharmony_ci	.minor = VHOST_VSOCK_MINOR,
91662306a36Sopenharmony_ci	.name = "vhost-vsock",
91762306a36Sopenharmony_ci	.fops = &vhost_vsock_fops,
91862306a36Sopenharmony_ci};
91962306a36Sopenharmony_ci
92062306a36Sopenharmony_cistatic int __init vhost_vsock_init(void)
92162306a36Sopenharmony_ci{
92262306a36Sopenharmony_ci	int ret;
92362306a36Sopenharmony_ci
92462306a36Sopenharmony_ci	ret = vsock_core_register(&vhost_transport.transport,
92562306a36Sopenharmony_ci				  VSOCK_TRANSPORT_F_H2G);
92662306a36Sopenharmony_ci	if (ret < 0)
92762306a36Sopenharmony_ci		return ret;
92862306a36Sopenharmony_ci
92962306a36Sopenharmony_ci	ret = misc_register(&vhost_vsock_misc);
93062306a36Sopenharmony_ci	if (ret) {
93162306a36Sopenharmony_ci		vsock_core_unregister(&vhost_transport.transport);
93262306a36Sopenharmony_ci		return ret;
93362306a36Sopenharmony_ci	}
93462306a36Sopenharmony_ci
93562306a36Sopenharmony_ci	return 0;
93662306a36Sopenharmony_ci};
93762306a36Sopenharmony_ci
93862306a36Sopenharmony_cistatic void __exit vhost_vsock_exit(void)
93962306a36Sopenharmony_ci{
94062306a36Sopenharmony_ci	misc_deregister(&vhost_vsock_misc);
94162306a36Sopenharmony_ci	vsock_core_unregister(&vhost_transport.transport);
94262306a36Sopenharmony_ci};
94362306a36Sopenharmony_ci
94462306a36Sopenharmony_cimodule_init(vhost_vsock_init);
94562306a36Sopenharmony_cimodule_exit(vhost_vsock_exit);
94662306a36Sopenharmony_ciMODULE_LICENSE("GPL v2");
94762306a36Sopenharmony_ciMODULE_AUTHOR("Asias He");
94862306a36Sopenharmony_ciMODULE_DESCRIPTION("vhost transport for vsock ");
94962306a36Sopenharmony_ciMODULE_ALIAS_MISCDEV(VHOST_VSOCK_MINOR);
95062306a36Sopenharmony_ciMODULE_ALIAS("devname:vhost-vsock");
951