18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * vhost transport for vsock
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci * Copyright (C) 2013-2015 Red Hat, Inc.
68c2ecf20Sopenharmony_ci * Author: Asias He <asias@redhat.com>
78c2ecf20Sopenharmony_ci *         Stefan Hajnoczi <stefanha@redhat.com>
88c2ecf20Sopenharmony_ci */
98c2ecf20Sopenharmony_ci#include <linux/miscdevice.h>
108c2ecf20Sopenharmony_ci#include <linux/atomic.h>
118c2ecf20Sopenharmony_ci#include <linux/module.h>
128c2ecf20Sopenharmony_ci#include <linux/mutex.h>
138c2ecf20Sopenharmony_ci#include <linux/vmalloc.h>
148c2ecf20Sopenharmony_ci#include <net/sock.h>
158c2ecf20Sopenharmony_ci#include <linux/virtio_vsock.h>
168c2ecf20Sopenharmony_ci#include <linux/vhost.h>
178c2ecf20Sopenharmony_ci#include <linux/hashtable.h>
188c2ecf20Sopenharmony_ci
198c2ecf20Sopenharmony_ci#include <net/af_vsock.h>
208c2ecf20Sopenharmony_ci#include "vhost.h"
218c2ecf20Sopenharmony_ci
228c2ecf20Sopenharmony_ci#define VHOST_VSOCK_DEFAULT_HOST_CID	2
238c2ecf20Sopenharmony_ci/* Max number of bytes transferred before requeueing the job.
248c2ecf20Sopenharmony_ci * Using this limit prevents one virtqueue from starving others. */
258c2ecf20Sopenharmony_ci#define VHOST_VSOCK_WEIGHT 0x80000
268c2ecf20Sopenharmony_ci/* Max number of packets transferred before requeueing the job.
278c2ecf20Sopenharmony_ci * Using this limit prevents one virtqueue from starving others with
288c2ecf20Sopenharmony_ci * small pkts.
298c2ecf20Sopenharmony_ci */
308c2ecf20Sopenharmony_ci#define VHOST_VSOCK_PKT_WEIGHT 256
318c2ecf20Sopenharmony_ci
328c2ecf20Sopenharmony_cienum {
338c2ecf20Sopenharmony_ci	VHOST_VSOCK_FEATURES = VHOST_FEATURES,
348c2ecf20Sopenharmony_ci};
358c2ecf20Sopenharmony_ci
368c2ecf20Sopenharmony_ci/* Used to track all the vhost_vsock instances on the system. */
378c2ecf20Sopenharmony_cistatic DEFINE_MUTEX(vhost_vsock_mutex);
388c2ecf20Sopenharmony_cistatic DEFINE_READ_MOSTLY_HASHTABLE(vhost_vsock_hash, 8);
398c2ecf20Sopenharmony_ci
408c2ecf20Sopenharmony_cistruct vhost_vsock {
418c2ecf20Sopenharmony_ci	struct vhost_dev dev;
428c2ecf20Sopenharmony_ci	struct vhost_virtqueue vqs[2];
438c2ecf20Sopenharmony_ci
448c2ecf20Sopenharmony_ci	/* Link to global vhost_vsock_hash, writes use vhost_vsock_mutex */
458c2ecf20Sopenharmony_ci	struct hlist_node hash;
468c2ecf20Sopenharmony_ci
478c2ecf20Sopenharmony_ci	struct vhost_work send_pkt_work;
488c2ecf20Sopenharmony_ci	spinlock_t send_pkt_list_lock;
498c2ecf20Sopenharmony_ci	struct list_head send_pkt_list;	/* host->guest pending packets */
508c2ecf20Sopenharmony_ci
518c2ecf20Sopenharmony_ci	atomic_t queued_replies;
528c2ecf20Sopenharmony_ci
538c2ecf20Sopenharmony_ci	u32 guest_cid;
548c2ecf20Sopenharmony_ci};
558c2ecf20Sopenharmony_ci
568c2ecf20Sopenharmony_cistatic u32 vhost_transport_get_local_cid(void)
578c2ecf20Sopenharmony_ci{
588c2ecf20Sopenharmony_ci	return VHOST_VSOCK_DEFAULT_HOST_CID;
598c2ecf20Sopenharmony_ci}
608c2ecf20Sopenharmony_ci
618c2ecf20Sopenharmony_ci/* Callers that dereference the return value must hold vhost_vsock_mutex or the
628c2ecf20Sopenharmony_ci * RCU read lock.
638c2ecf20Sopenharmony_ci */
648c2ecf20Sopenharmony_cistatic struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
658c2ecf20Sopenharmony_ci{
668c2ecf20Sopenharmony_ci	struct vhost_vsock *vsock;
678c2ecf20Sopenharmony_ci
688c2ecf20Sopenharmony_ci	hash_for_each_possible_rcu(vhost_vsock_hash, vsock, hash, guest_cid) {
698c2ecf20Sopenharmony_ci		u32 other_cid = vsock->guest_cid;
708c2ecf20Sopenharmony_ci
718c2ecf20Sopenharmony_ci		/* Skip instances that have no CID yet */
728c2ecf20Sopenharmony_ci		if (other_cid == 0)
738c2ecf20Sopenharmony_ci			continue;
748c2ecf20Sopenharmony_ci
758c2ecf20Sopenharmony_ci		if (other_cid == guest_cid)
768c2ecf20Sopenharmony_ci			return vsock;
778c2ecf20Sopenharmony_ci
788c2ecf20Sopenharmony_ci	}
798c2ecf20Sopenharmony_ci
808c2ecf20Sopenharmony_ci	return NULL;
818c2ecf20Sopenharmony_ci}
828c2ecf20Sopenharmony_ci
838c2ecf20Sopenharmony_cistatic void
848c2ecf20Sopenharmony_civhost_transport_do_send_pkt(struct vhost_vsock *vsock,
858c2ecf20Sopenharmony_ci			    struct vhost_virtqueue *vq)
868c2ecf20Sopenharmony_ci{
878c2ecf20Sopenharmony_ci	struct vhost_virtqueue *tx_vq = &vsock->vqs[VSOCK_VQ_TX];
888c2ecf20Sopenharmony_ci	int pkts = 0, total_len = 0;
898c2ecf20Sopenharmony_ci	bool added = false;
908c2ecf20Sopenharmony_ci	bool restart_tx = false;
918c2ecf20Sopenharmony_ci
928c2ecf20Sopenharmony_ci	mutex_lock(&vq->mutex);
938c2ecf20Sopenharmony_ci
948c2ecf20Sopenharmony_ci	if (!vhost_vq_get_backend(vq))
958c2ecf20Sopenharmony_ci		goto out;
968c2ecf20Sopenharmony_ci
978c2ecf20Sopenharmony_ci	/* Avoid further vmexits, we're already processing the virtqueue */
988c2ecf20Sopenharmony_ci	vhost_disable_notify(&vsock->dev, vq);
998c2ecf20Sopenharmony_ci
1008c2ecf20Sopenharmony_ci	do {
1018c2ecf20Sopenharmony_ci		struct virtio_vsock_pkt *pkt;
1028c2ecf20Sopenharmony_ci		struct iov_iter iov_iter;
1038c2ecf20Sopenharmony_ci		unsigned out, in;
1048c2ecf20Sopenharmony_ci		size_t nbytes;
1058c2ecf20Sopenharmony_ci		size_t iov_len, payload_len;
1068c2ecf20Sopenharmony_ci		int head;
1078c2ecf20Sopenharmony_ci
1088c2ecf20Sopenharmony_ci		spin_lock_bh(&vsock->send_pkt_list_lock);
1098c2ecf20Sopenharmony_ci		if (list_empty(&vsock->send_pkt_list)) {
1108c2ecf20Sopenharmony_ci			spin_unlock_bh(&vsock->send_pkt_list_lock);
1118c2ecf20Sopenharmony_ci			vhost_enable_notify(&vsock->dev, vq);
1128c2ecf20Sopenharmony_ci			break;
1138c2ecf20Sopenharmony_ci		}
1148c2ecf20Sopenharmony_ci
1158c2ecf20Sopenharmony_ci		pkt = list_first_entry(&vsock->send_pkt_list,
1168c2ecf20Sopenharmony_ci				       struct virtio_vsock_pkt, list);
1178c2ecf20Sopenharmony_ci		list_del_init(&pkt->list);
1188c2ecf20Sopenharmony_ci		spin_unlock_bh(&vsock->send_pkt_list_lock);
1198c2ecf20Sopenharmony_ci
1208c2ecf20Sopenharmony_ci		head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
1218c2ecf20Sopenharmony_ci					 &out, &in, NULL, NULL);
1228c2ecf20Sopenharmony_ci		if (head < 0) {
1238c2ecf20Sopenharmony_ci			spin_lock_bh(&vsock->send_pkt_list_lock);
1248c2ecf20Sopenharmony_ci			list_add(&pkt->list, &vsock->send_pkt_list);
1258c2ecf20Sopenharmony_ci			spin_unlock_bh(&vsock->send_pkt_list_lock);
1268c2ecf20Sopenharmony_ci			break;
1278c2ecf20Sopenharmony_ci		}
1288c2ecf20Sopenharmony_ci
1298c2ecf20Sopenharmony_ci		if (head == vq->num) {
1308c2ecf20Sopenharmony_ci			spin_lock_bh(&vsock->send_pkt_list_lock);
1318c2ecf20Sopenharmony_ci			list_add(&pkt->list, &vsock->send_pkt_list);
1328c2ecf20Sopenharmony_ci			spin_unlock_bh(&vsock->send_pkt_list_lock);
1338c2ecf20Sopenharmony_ci
1348c2ecf20Sopenharmony_ci			/* We cannot finish yet if more buffers snuck in while
1358c2ecf20Sopenharmony_ci			 * re-enabling notify.
1368c2ecf20Sopenharmony_ci			 */
1378c2ecf20Sopenharmony_ci			if (unlikely(vhost_enable_notify(&vsock->dev, vq))) {
1388c2ecf20Sopenharmony_ci				vhost_disable_notify(&vsock->dev, vq);
1398c2ecf20Sopenharmony_ci				continue;
1408c2ecf20Sopenharmony_ci			}
1418c2ecf20Sopenharmony_ci			break;
1428c2ecf20Sopenharmony_ci		}
1438c2ecf20Sopenharmony_ci
1448c2ecf20Sopenharmony_ci		if (out) {
1458c2ecf20Sopenharmony_ci			virtio_transport_free_pkt(pkt);
1468c2ecf20Sopenharmony_ci			vq_err(vq, "Expected 0 output buffers, got %u\n", out);
1478c2ecf20Sopenharmony_ci			break;
1488c2ecf20Sopenharmony_ci		}
1498c2ecf20Sopenharmony_ci
1508c2ecf20Sopenharmony_ci		iov_len = iov_length(&vq->iov[out], in);
1518c2ecf20Sopenharmony_ci		if (iov_len < sizeof(pkt->hdr)) {
1528c2ecf20Sopenharmony_ci			virtio_transport_free_pkt(pkt);
1538c2ecf20Sopenharmony_ci			vq_err(vq, "Buffer len [%zu] too small\n", iov_len);
1548c2ecf20Sopenharmony_ci			break;
1558c2ecf20Sopenharmony_ci		}
1568c2ecf20Sopenharmony_ci
1578c2ecf20Sopenharmony_ci		iov_iter_init(&iov_iter, READ, &vq->iov[out], in, iov_len);
1588c2ecf20Sopenharmony_ci		payload_len = pkt->len - pkt->off;
1598c2ecf20Sopenharmony_ci
1608c2ecf20Sopenharmony_ci		/* If the packet is greater than the space available in the
1618c2ecf20Sopenharmony_ci		 * buffer, we split it using multiple buffers.
1628c2ecf20Sopenharmony_ci		 */
1638c2ecf20Sopenharmony_ci		if (payload_len > iov_len - sizeof(pkt->hdr))
1648c2ecf20Sopenharmony_ci			payload_len = iov_len - sizeof(pkt->hdr);
1658c2ecf20Sopenharmony_ci
1668c2ecf20Sopenharmony_ci		/* Set the correct length in the header */
1678c2ecf20Sopenharmony_ci		pkt->hdr.len = cpu_to_le32(payload_len);
1688c2ecf20Sopenharmony_ci
1698c2ecf20Sopenharmony_ci		nbytes = copy_to_iter(&pkt->hdr, sizeof(pkt->hdr), &iov_iter);
1708c2ecf20Sopenharmony_ci		if (nbytes != sizeof(pkt->hdr)) {
1718c2ecf20Sopenharmony_ci			virtio_transport_free_pkt(pkt);
1728c2ecf20Sopenharmony_ci			vq_err(vq, "Faulted on copying pkt hdr\n");
1738c2ecf20Sopenharmony_ci			break;
1748c2ecf20Sopenharmony_ci		}
1758c2ecf20Sopenharmony_ci
1768c2ecf20Sopenharmony_ci		nbytes = copy_to_iter(pkt->buf + pkt->off, payload_len,
1778c2ecf20Sopenharmony_ci				      &iov_iter);
1788c2ecf20Sopenharmony_ci		if (nbytes != payload_len) {
1798c2ecf20Sopenharmony_ci			virtio_transport_free_pkt(pkt);
1808c2ecf20Sopenharmony_ci			vq_err(vq, "Faulted on copying pkt buf\n");
1818c2ecf20Sopenharmony_ci			break;
1828c2ecf20Sopenharmony_ci		}
1838c2ecf20Sopenharmony_ci
1848c2ecf20Sopenharmony_ci		/* Deliver to monitoring devices all packets that we
1858c2ecf20Sopenharmony_ci		 * will transmit.
1868c2ecf20Sopenharmony_ci		 */
1878c2ecf20Sopenharmony_ci		virtio_transport_deliver_tap_pkt(pkt);
1888c2ecf20Sopenharmony_ci
1898c2ecf20Sopenharmony_ci		vhost_add_used(vq, head, sizeof(pkt->hdr) + payload_len);
1908c2ecf20Sopenharmony_ci		added = true;
1918c2ecf20Sopenharmony_ci
1928c2ecf20Sopenharmony_ci		pkt->off += payload_len;
1938c2ecf20Sopenharmony_ci		total_len += payload_len;
1948c2ecf20Sopenharmony_ci
1958c2ecf20Sopenharmony_ci		/* If we didn't send all the payload we can requeue the packet
1968c2ecf20Sopenharmony_ci		 * to send it with the next available buffer.
1978c2ecf20Sopenharmony_ci		 */
1988c2ecf20Sopenharmony_ci		if (pkt->off < pkt->len) {
1998c2ecf20Sopenharmony_ci			/* We are queueing the same virtio_vsock_pkt to handle
2008c2ecf20Sopenharmony_ci			 * the remaining bytes, and we want to deliver it
2018c2ecf20Sopenharmony_ci			 * to monitoring devices in the next iteration.
2028c2ecf20Sopenharmony_ci			 */
2038c2ecf20Sopenharmony_ci			pkt->tap_delivered = false;
2048c2ecf20Sopenharmony_ci
2058c2ecf20Sopenharmony_ci			spin_lock_bh(&vsock->send_pkt_list_lock);
2068c2ecf20Sopenharmony_ci			list_add(&pkt->list, &vsock->send_pkt_list);
2078c2ecf20Sopenharmony_ci			spin_unlock_bh(&vsock->send_pkt_list_lock);
2088c2ecf20Sopenharmony_ci		} else {
2098c2ecf20Sopenharmony_ci			if (pkt->reply) {
2108c2ecf20Sopenharmony_ci				int val;
2118c2ecf20Sopenharmony_ci
2128c2ecf20Sopenharmony_ci				val = atomic_dec_return(&vsock->queued_replies);
2138c2ecf20Sopenharmony_ci
2148c2ecf20Sopenharmony_ci				/* Do we have resources to resume tx
2158c2ecf20Sopenharmony_ci				 * processing?
2168c2ecf20Sopenharmony_ci				 */
2178c2ecf20Sopenharmony_ci				if (val + 1 == tx_vq->num)
2188c2ecf20Sopenharmony_ci					restart_tx = true;
2198c2ecf20Sopenharmony_ci			}
2208c2ecf20Sopenharmony_ci
2218c2ecf20Sopenharmony_ci			virtio_transport_free_pkt(pkt);
2228c2ecf20Sopenharmony_ci		}
2238c2ecf20Sopenharmony_ci	} while(likely(!vhost_exceeds_weight(vq, ++pkts, total_len)));
2248c2ecf20Sopenharmony_ci	if (added)
2258c2ecf20Sopenharmony_ci		vhost_signal(&vsock->dev, vq);
2268c2ecf20Sopenharmony_ci
2278c2ecf20Sopenharmony_ciout:
2288c2ecf20Sopenharmony_ci	mutex_unlock(&vq->mutex);
2298c2ecf20Sopenharmony_ci
2308c2ecf20Sopenharmony_ci	if (restart_tx)
2318c2ecf20Sopenharmony_ci		vhost_poll_queue(&tx_vq->poll);
2328c2ecf20Sopenharmony_ci}
2338c2ecf20Sopenharmony_ci
2348c2ecf20Sopenharmony_cistatic void vhost_transport_send_pkt_work(struct vhost_work *work)
2358c2ecf20Sopenharmony_ci{
2368c2ecf20Sopenharmony_ci	struct vhost_virtqueue *vq;
2378c2ecf20Sopenharmony_ci	struct vhost_vsock *vsock;
2388c2ecf20Sopenharmony_ci
2398c2ecf20Sopenharmony_ci	vsock = container_of(work, struct vhost_vsock, send_pkt_work);
2408c2ecf20Sopenharmony_ci	vq = &vsock->vqs[VSOCK_VQ_RX];
2418c2ecf20Sopenharmony_ci
2428c2ecf20Sopenharmony_ci	vhost_transport_do_send_pkt(vsock, vq);
2438c2ecf20Sopenharmony_ci}
2448c2ecf20Sopenharmony_ci
2458c2ecf20Sopenharmony_cistatic int
2468c2ecf20Sopenharmony_civhost_transport_send_pkt(struct virtio_vsock_pkt *pkt)
2478c2ecf20Sopenharmony_ci{
2488c2ecf20Sopenharmony_ci	struct vhost_vsock *vsock;
2498c2ecf20Sopenharmony_ci	int len = pkt->len;
2508c2ecf20Sopenharmony_ci
2518c2ecf20Sopenharmony_ci	rcu_read_lock();
2528c2ecf20Sopenharmony_ci
2538c2ecf20Sopenharmony_ci	/* Find the vhost_vsock according to guest context id  */
2548c2ecf20Sopenharmony_ci	vsock = vhost_vsock_get(le64_to_cpu(pkt->hdr.dst_cid));
2558c2ecf20Sopenharmony_ci	if (!vsock) {
2568c2ecf20Sopenharmony_ci		rcu_read_unlock();
2578c2ecf20Sopenharmony_ci		virtio_transport_free_pkt(pkt);
2588c2ecf20Sopenharmony_ci		return -ENODEV;
2598c2ecf20Sopenharmony_ci	}
2608c2ecf20Sopenharmony_ci
2618c2ecf20Sopenharmony_ci	if (pkt->reply)
2628c2ecf20Sopenharmony_ci		atomic_inc(&vsock->queued_replies);
2638c2ecf20Sopenharmony_ci
2648c2ecf20Sopenharmony_ci	spin_lock_bh(&vsock->send_pkt_list_lock);
2658c2ecf20Sopenharmony_ci	list_add_tail(&pkt->list, &vsock->send_pkt_list);
2668c2ecf20Sopenharmony_ci	spin_unlock_bh(&vsock->send_pkt_list_lock);
2678c2ecf20Sopenharmony_ci
2688c2ecf20Sopenharmony_ci	vhost_work_queue(&vsock->dev, &vsock->send_pkt_work);
2698c2ecf20Sopenharmony_ci
2708c2ecf20Sopenharmony_ci	rcu_read_unlock();
2718c2ecf20Sopenharmony_ci	return len;
2728c2ecf20Sopenharmony_ci}
2738c2ecf20Sopenharmony_ci
2748c2ecf20Sopenharmony_cistatic int
2758c2ecf20Sopenharmony_civhost_transport_cancel_pkt(struct vsock_sock *vsk)
2768c2ecf20Sopenharmony_ci{
2778c2ecf20Sopenharmony_ci	struct vhost_vsock *vsock;
2788c2ecf20Sopenharmony_ci	struct virtio_vsock_pkt *pkt, *n;
2798c2ecf20Sopenharmony_ci	int cnt = 0;
2808c2ecf20Sopenharmony_ci	int ret = -ENODEV;
2818c2ecf20Sopenharmony_ci	LIST_HEAD(freeme);
2828c2ecf20Sopenharmony_ci
2838c2ecf20Sopenharmony_ci	rcu_read_lock();
2848c2ecf20Sopenharmony_ci
2858c2ecf20Sopenharmony_ci	/* Find the vhost_vsock according to guest context id  */
2868c2ecf20Sopenharmony_ci	vsock = vhost_vsock_get(vsk->remote_addr.svm_cid);
2878c2ecf20Sopenharmony_ci	if (!vsock)
2888c2ecf20Sopenharmony_ci		goto out;
2898c2ecf20Sopenharmony_ci
2908c2ecf20Sopenharmony_ci	spin_lock_bh(&vsock->send_pkt_list_lock);
2918c2ecf20Sopenharmony_ci	list_for_each_entry_safe(pkt, n, &vsock->send_pkt_list, list) {
2928c2ecf20Sopenharmony_ci		if (pkt->vsk != vsk)
2938c2ecf20Sopenharmony_ci			continue;
2948c2ecf20Sopenharmony_ci		list_move(&pkt->list, &freeme);
2958c2ecf20Sopenharmony_ci	}
2968c2ecf20Sopenharmony_ci	spin_unlock_bh(&vsock->send_pkt_list_lock);
2978c2ecf20Sopenharmony_ci
2988c2ecf20Sopenharmony_ci	list_for_each_entry_safe(pkt, n, &freeme, list) {
2998c2ecf20Sopenharmony_ci		if (pkt->reply)
3008c2ecf20Sopenharmony_ci			cnt++;
3018c2ecf20Sopenharmony_ci		list_del(&pkt->list);
3028c2ecf20Sopenharmony_ci		virtio_transport_free_pkt(pkt);
3038c2ecf20Sopenharmony_ci	}
3048c2ecf20Sopenharmony_ci
3058c2ecf20Sopenharmony_ci	if (cnt) {
3068c2ecf20Sopenharmony_ci		struct vhost_virtqueue *tx_vq = &vsock->vqs[VSOCK_VQ_TX];
3078c2ecf20Sopenharmony_ci		int new_cnt;
3088c2ecf20Sopenharmony_ci
3098c2ecf20Sopenharmony_ci		new_cnt = atomic_sub_return(cnt, &vsock->queued_replies);
3108c2ecf20Sopenharmony_ci		if (new_cnt + cnt >= tx_vq->num && new_cnt < tx_vq->num)
3118c2ecf20Sopenharmony_ci			vhost_poll_queue(&tx_vq->poll);
3128c2ecf20Sopenharmony_ci	}
3138c2ecf20Sopenharmony_ci
3148c2ecf20Sopenharmony_ci	ret = 0;
3158c2ecf20Sopenharmony_ciout:
3168c2ecf20Sopenharmony_ci	rcu_read_unlock();
3178c2ecf20Sopenharmony_ci	return ret;
3188c2ecf20Sopenharmony_ci}
3198c2ecf20Sopenharmony_ci
3208c2ecf20Sopenharmony_cistatic struct virtio_vsock_pkt *
3218c2ecf20Sopenharmony_civhost_vsock_alloc_pkt(struct vhost_virtqueue *vq,
3228c2ecf20Sopenharmony_ci		      unsigned int out, unsigned int in)
3238c2ecf20Sopenharmony_ci{
3248c2ecf20Sopenharmony_ci	struct virtio_vsock_pkt *pkt;
3258c2ecf20Sopenharmony_ci	struct iov_iter iov_iter;
3268c2ecf20Sopenharmony_ci	size_t nbytes;
3278c2ecf20Sopenharmony_ci	size_t len;
3288c2ecf20Sopenharmony_ci
3298c2ecf20Sopenharmony_ci	if (in != 0) {
3308c2ecf20Sopenharmony_ci		vq_err(vq, "Expected 0 input buffers, got %u\n", in);
3318c2ecf20Sopenharmony_ci		return NULL;
3328c2ecf20Sopenharmony_ci	}
3338c2ecf20Sopenharmony_ci
3348c2ecf20Sopenharmony_ci	pkt = kzalloc(sizeof(*pkt), GFP_KERNEL);
3358c2ecf20Sopenharmony_ci	if (!pkt)
3368c2ecf20Sopenharmony_ci		return NULL;
3378c2ecf20Sopenharmony_ci
3388c2ecf20Sopenharmony_ci	len = iov_length(vq->iov, out);
3398c2ecf20Sopenharmony_ci	iov_iter_init(&iov_iter, WRITE, vq->iov, out, len);
3408c2ecf20Sopenharmony_ci
3418c2ecf20Sopenharmony_ci	nbytes = copy_from_iter(&pkt->hdr, sizeof(pkt->hdr), &iov_iter);
3428c2ecf20Sopenharmony_ci	if (nbytes != sizeof(pkt->hdr)) {
3438c2ecf20Sopenharmony_ci		vq_err(vq, "Expected %zu bytes for pkt->hdr, got %zu bytes\n",
3448c2ecf20Sopenharmony_ci		       sizeof(pkt->hdr), nbytes);
3458c2ecf20Sopenharmony_ci		kfree(pkt);
3468c2ecf20Sopenharmony_ci		return NULL;
3478c2ecf20Sopenharmony_ci	}
3488c2ecf20Sopenharmony_ci
3498c2ecf20Sopenharmony_ci	if (le16_to_cpu(pkt->hdr.type) == VIRTIO_VSOCK_TYPE_STREAM)
3508c2ecf20Sopenharmony_ci		pkt->len = le32_to_cpu(pkt->hdr.len);
3518c2ecf20Sopenharmony_ci
3528c2ecf20Sopenharmony_ci	/* No payload */
3538c2ecf20Sopenharmony_ci	if (!pkt->len)
3548c2ecf20Sopenharmony_ci		return pkt;
3558c2ecf20Sopenharmony_ci
3568c2ecf20Sopenharmony_ci	/* The pkt is too big */
3578c2ecf20Sopenharmony_ci	if (pkt->len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) {
3588c2ecf20Sopenharmony_ci		kfree(pkt);
3598c2ecf20Sopenharmony_ci		return NULL;
3608c2ecf20Sopenharmony_ci	}
3618c2ecf20Sopenharmony_ci
3628c2ecf20Sopenharmony_ci	pkt->buf = kvmalloc(pkt->len, GFP_KERNEL);
3638c2ecf20Sopenharmony_ci	if (!pkt->buf) {
3648c2ecf20Sopenharmony_ci		kfree(pkt);
3658c2ecf20Sopenharmony_ci		return NULL;
3668c2ecf20Sopenharmony_ci	}
3678c2ecf20Sopenharmony_ci
3688c2ecf20Sopenharmony_ci	pkt->buf_len = pkt->len;
3698c2ecf20Sopenharmony_ci
3708c2ecf20Sopenharmony_ci	nbytes = copy_from_iter(pkt->buf, pkt->len, &iov_iter);
3718c2ecf20Sopenharmony_ci	if (nbytes != pkt->len) {
3728c2ecf20Sopenharmony_ci		vq_err(vq, "Expected %u byte payload, got %zu bytes\n",
3738c2ecf20Sopenharmony_ci		       pkt->len, nbytes);
3748c2ecf20Sopenharmony_ci		virtio_transport_free_pkt(pkt);
3758c2ecf20Sopenharmony_ci		return NULL;
3768c2ecf20Sopenharmony_ci	}
3778c2ecf20Sopenharmony_ci
3788c2ecf20Sopenharmony_ci	return pkt;
3798c2ecf20Sopenharmony_ci}
3808c2ecf20Sopenharmony_ci
3818c2ecf20Sopenharmony_ci/* Is there space left for replies to rx packets? */
3828c2ecf20Sopenharmony_cistatic bool vhost_vsock_more_replies(struct vhost_vsock *vsock)
3838c2ecf20Sopenharmony_ci{
3848c2ecf20Sopenharmony_ci	struct vhost_virtqueue *vq = &vsock->vqs[VSOCK_VQ_TX];
3858c2ecf20Sopenharmony_ci	int val;
3868c2ecf20Sopenharmony_ci
3878c2ecf20Sopenharmony_ci	smp_rmb(); /* paired with atomic_inc() and atomic_dec_return() */
3888c2ecf20Sopenharmony_ci	val = atomic_read(&vsock->queued_replies);
3898c2ecf20Sopenharmony_ci
3908c2ecf20Sopenharmony_ci	return val < vq->num;
3918c2ecf20Sopenharmony_ci}
3928c2ecf20Sopenharmony_ci
3938c2ecf20Sopenharmony_cistatic struct virtio_transport vhost_transport = {
3948c2ecf20Sopenharmony_ci	.transport = {
3958c2ecf20Sopenharmony_ci		.module                   = THIS_MODULE,
3968c2ecf20Sopenharmony_ci
3978c2ecf20Sopenharmony_ci		.get_local_cid            = vhost_transport_get_local_cid,
3988c2ecf20Sopenharmony_ci
3998c2ecf20Sopenharmony_ci		.init                     = virtio_transport_do_socket_init,
4008c2ecf20Sopenharmony_ci		.destruct                 = virtio_transport_destruct,
4018c2ecf20Sopenharmony_ci		.release                  = virtio_transport_release,
4028c2ecf20Sopenharmony_ci		.connect                  = virtio_transport_connect,
4038c2ecf20Sopenharmony_ci		.shutdown                 = virtio_transport_shutdown,
4048c2ecf20Sopenharmony_ci		.cancel_pkt               = vhost_transport_cancel_pkt,
4058c2ecf20Sopenharmony_ci
4068c2ecf20Sopenharmony_ci		.dgram_enqueue            = virtio_transport_dgram_enqueue,
4078c2ecf20Sopenharmony_ci		.dgram_dequeue            = virtio_transport_dgram_dequeue,
4088c2ecf20Sopenharmony_ci		.dgram_bind               = virtio_transport_dgram_bind,
4098c2ecf20Sopenharmony_ci		.dgram_allow              = virtio_transport_dgram_allow,
4108c2ecf20Sopenharmony_ci
4118c2ecf20Sopenharmony_ci		.stream_enqueue           = virtio_transport_stream_enqueue,
4128c2ecf20Sopenharmony_ci		.stream_dequeue           = virtio_transport_stream_dequeue,
4138c2ecf20Sopenharmony_ci		.stream_has_data          = virtio_transport_stream_has_data,
4148c2ecf20Sopenharmony_ci		.stream_has_space         = virtio_transport_stream_has_space,
4158c2ecf20Sopenharmony_ci		.stream_rcvhiwat          = virtio_transport_stream_rcvhiwat,
4168c2ecf20Sopenharmony_ci		.stream_is_active         = virtio_transport_stream_is_active,
4178c2ecf20Sopenharmony_ci		.stream_allow             = virtio_transport_stream_allow,
4188c2ecf20Sopenharmony_ci
4198c2ecf20Sopenharmony_ci		.notify_poll_in           = virtio_transport_notify_poll_in,
4208c2ecf20Sopenharmony_ci		.notify_poll_out          = virtio_transport_notify_poll_out,
4218c2ecf20Sopenharmony_ci		.notify_recv_init         = virtio_transport_notify_recv_init,
4228c2ecf20Sopenharmony_ci		.notify_recv_pre_block    = virtio_transport_notify_recv_pre_block,
4238c2ecf20Sopenharmony_ci		.notify_recv_pre_dequeue  = virtio_transport_notify_recv_pre_dequeue,
4248c2ecf20Sopenharmony_ci		.notify_recv_post_dequeue = virtio_transport_notify_recv_post_dequeue,
4258c2ecf20Sopenharmony_ci		.notify_send_init         = virtio_transport_notify_send_init,
4268c2ecf20Sopenharmony_ci		.notify_send_pre_block    = virtio_transport_notify_send_pre_block,
4278c2ecf20Sopenharmony_ci		.notify_send_pre_enqueue  = virtio_transport_notify_send_pre_enqueue,
4288c2ecf20Sopenharmony_ci		.notify_send_post_enqueue = virtio_transport_notify_send_post_enqueue,
4298c2ecf20Sopenharmony_ci		.notify_buffer_size       = virtio_transport_notify_buffer_size,
4308c2ecf20Sopenharmony_ci
4318c2ecf20Sopenharmony_ci	},
4328c2ecf20Sopenharmony_ci
4338c2ecf20Sopenharmony_ci	.send_pkt = vhost_transport_send_pkt,
4348c2ecf20Sopenharmony_ci};
4358c2ecf20Sopenharmony_ci
4368c2ecf20Sopenharmony_cistatic void vhost_vsock_handle_tx_kick(struct vhost_work *work)
4378c2ecf20Sopenharmony_ci{
4388c2ecf20Sopenharmony_ci	struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
4398c2ecf20Sopenharmony_ci						  poll.work);
4408c2ecf20Sopenharmony_ci	struct vhost_vsock *vsock = container_of(vq->dev, struct vhost_vsock,
4418c2ecf20Sopenharmony_ci						 dev);
4428c2ecf20Sopenharmony_ci	struct virtio_vsock_pkt *pkt;
4438c2ecf20Sopenharmony_ci	int head, pkts = 0, total_len = 0;
4448c2ecf20Sopenharmony_ci	unsigned int out, in;
4458c2ecf20Sopenharmony_ci	bool added = false;
4468c2ecf20Sopenharmony_ci
4478c2ecf20Sopenharmony_ci	mutex_lock(&vq->mutex);
4488c2ecf20Sopenharmony_ci
4498c2ecf20Sopenharmony_ci	if (!vhost_vq_get_backend(vq))
4508c2ecf20Sopenharmony_ci		goto out;
4518c2ecf20Sopenharmony_ci
4528c2ecf20Sopenharmony_ci	vhost_disable_notify(&vsock->dev, vq);
4538c2ecf20Sopenharmony_ci	do {
4548c2ecf20Sopenharmony_ci		u32 len;
4558c2ecf20Sopenharmony_ci
4568c2ecf20Sopenharmony_ci		if (!vhost_vsock_more_replies(vsock)) {
4578c2ecf20Sopenharmony_ci			/* Stop tx until the device processes already
4588c2ecf20Sopenharmony_ci			 * pending replies.  Leave tx virtqueue
4598c2ecf20Sopenharmony_ci			 * callbacks disabled.
4608c2ecf20Sopenharmony_ci			 */
4618c2ecf20Sopenharmony_ci			goto no_more_replies;
4628c2ecf20Sopenharmony_ci		}
4638c2ecf20Sopenharmony_ci
4648c2ecf20Sopenharmony_ci		head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
4658c2ecf20Sopenharmony_ci					 &out, &in, NULL, NULL);
4668c2ecf20Sopenharmony_ci		if (head < 0)
4678c2ecf20Sopenharmony_ci			break;
4688c2ecf20Sopenharmony_ci
4698c2ecf20Sopenharmony_ci		if (head == vq->num) {
4708c2ecf20Sopenharmony_ci			if (unlikely(vhost_enable_notify(&vsock->dev, vq))) {
4718c2ecf20Sopenharmony_ci				vhost_disable_notify(&vsock->dev, vq);
4728c2ecf20Sopenharmony_ci				continue;
4738c2ecf20Sopenharmony_ci			}
4748c2ecf20Sopenharmony_ci			break;
4758c2ecf20Sopenharmony_ci		}
4768c2ecf20Sopenharmony_ci
4778c2ecf20Sopenharmony_ci		pkt = vhost_vsock_alloc_pkt(vq, out, in);
4788c2ecf20Sopenharmony_ci		if (!pkt) {
4798c2ecf20Sopenharmony_ci			vq_err(vq, "Faulted on pkt\n");
4808c2ecf20Sopenharmony_ci			continue;
4818c2ecf20Sopenharmony_ci		}
4828c2ecf20Sopenharmony_ci
4838c2ecf20Sopenharmony_ci		len = pkt->len;
4848c2ecf20Sopenharmony_ci
4858c2ecf20Sopenharmony_ci		/* Deliver to monitoring devices all received packets */
4868c2ecf20Sopenharmony_ci		virtio_transport_deliver_tap_pkt(pkt);
4878c2ecf20Sopenharmony_ci
4888c2ecf20Sopenharmony_ci		/* Only accept correctly addressed packets */
4898c2ecf20Sopenharmony_ci		if (le64_to_cpu(pkt->hdr.src_cid) == vsock->guest_cid &&
4908c2ecf20Sopenharmony_ci		    le64_to_cpu(pkt->hdr.dst_cid) ==
4918c2ecf20Sopenharmony_ci		    vhost_transport_get_local_cid())
4928c2ecf20Sopenharmony_ci			virtio_transport_recv_pkt(&vhost_transport, pkt);
4938c2ecf20Sopenharmony_ci		else
4948c2ecf20Sopenharmony_ci			virtio_transport_free_pkt(pkt);
4958c2ecf20Sopenharmony_ci
4968c2ecf20Sopenharmony_ci		len += sizeof(pkt->hdr);
4978c2ecf20Sopenharmony_ci		vhost_add_used(vq, head, 0);
4988c2ecf20Sopenharmony_ci		total_len += len;
4998c2ecf20Sopenharmony_ci		added = true;
5008c2ecf20Sopenharmony_ci	} while(likely(!vhost_exceeds_weight(vq, ++pkts, total_len)));
5018c2ecf20Sopenharmony_ci
5028c2ecf20Sopenharmony_cino_more_replies:
5038c2ecf20Sopenharmony_ci	if (added)
5048c2ecf20Sopenharmony_ci		vhost_signal(&vsock->dev, vq);
5058c2ecf20Sopenharmony_ci
5068c2ecf20Sopenharmony_ciout:
5078c2ecf20Sopenharmony_ci	mutex_unlock(&vq->mutex);
5088c2ecf20Sopenharmony_ci}
5098c2ecf20Sopenharmony_ci
5108c2ecf20Sopenharmony_cistatic void vhost_vsock_handle_rx_kick(struct vhost_work *work)
5118c2ecf20Sopenharmony_ci{
5128c2ecf20Sopenharmony_ci	struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
5138c2ecf20Sopenharmony_ci						poll.work);
5148c2ecf20Sopenharmony_ci	struct vhost_vsock *vsock = container_of(vq->dev, struct vhost_vsock,
5158c2ecf20Sopenharmony_ci						 dev);
5168c2ecf20Sopenharmony_ci
5178c2ecf20Sopenharmony_ci	vhost_transport_do_send_pkt(vsock, vq);
5188c2ecf20Sopenharmony_ci}
5198c2ecf20Sopenharmony_ci
5208c2ecf20Sopenharmony_cistatic int vhost_vsock_start(struct vhost_vsock *vsock)
5218c2ecf20Sopenharmony_ci{
5228c2ecf20Sopenharmony_ci	struct vhost_virtqueue *vq;
5238c2ecf20Sopenharmony_ci	size_t i;
5248c2ecf20Sopenharmony_ci	int ret;
5258c2ecf20Sopenharmony_ci
5268c2ecf20Sopenharmony_ci	mutex_lock(&vsock->dev.mutex);
5278c2ecf20Sopenharmony_ci
5288c2ecf20Sopenharmony_ci	ret = vhost_dev_check_owner(&vsock->dev);
5298c2ecf20Sopenharmony_ci	if (ret)
5308c2ecf20Sopenharmony_ci		goto err;
5318c2ecf20Sopenharmony_ci
5328c2ecf20Sopenharmony_ci	for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
5338c2ecf20Sopenharmony_ci		vq = &vsock->vqs[i];
5348c2ecf20Sopenharmony_ci
5358c2ecf20Sopenharmony_ci		mutex_lock(&vq->mutex);
5368c2ecf20Sopenharmony_ci
5378c2ecf20Sopenharmony_ci		if (!vhost_vq_access_ok(vq)) {
5388c2ecf20Sopenharmony_ci			ret = -EFAULT;
5398c2ecf20Sopenharmony_ci			goto err_vq;
5408c2ecf20Sopenharmony_ci		}
5418c2ecf20Sopenharmony_ci
5428c2ecf20Sopenharmony_ci		if (!vhost_vq_get_backend(vq)) {
5438c2ecf20Sopenharmony_ci			vhost_vq_set_backend(vq, vsock);
5448c2ecf20Sopenharmony_ci			ret = vhost_vq_init_access(vq);
5458c2ecf20Sopenharmony_ci			if (ret)
5468c2ecf20Sopenharmony_ci				goto err_vq;
5478c2ecf20Sopenharmony_ci		}
5488c2ecf20Sopenharmony_ci
5498c2ecf20Sopenharmony_ci		mutex_unlock(&vq->mutex);
5508c2ecf20Sopenharmony_ci	}
5518c2ecf20Sopenharmony_ci
5528c2ecf20Sopenharmony_ci	/* Some packets may have been queued before the device was started,
5538c2ecf20Sopenharmony_ci	 * let's kick the send worker to send them.
5548c2ecf20Sopenharmony_ci	 */
5558c2ecf20Sopenharmony_ci	vhost_work_queue(&vsock->dev, &vsock->send_pkt_work);
5568c2ecf20Sopenharmony_ci
5578c2ecf20Sopenharmony_ci	mutex_unlock(&vsock->dev.mutex);
5588c2ecf20Sopenharmony_ci	return 0;
5598c2ecf20Sopenharmony_ci
5608c2ecf20Sopenharmony_cierr_vq:
5618c2ecf20Sopenharmony_ci	vhost_vq_set_backend(vq, NULL);
5628c2ecf20Sopenharmony_ci	mutex_unlock(&vq->mutex);
5638c2ecf20Sopenharmony_ci
5648c2ecf20Sopenharmony_ci	for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
5658c2ecf20Sopenharmony_ci		vq = &vsock->vqs[i];
5668c2ecf20Sopenharmony_ci
5678c2ecf20Sopenharmony_ci		mutex_lock(&vq->mutex);
5688c2ecf20Sopenharmony_ci		vhost_vq_set_backend(vq, NULL);
5698c2ecf20Sopenharmony_ci		mutex_unlock(&vq->mutex);
5708c2ecf20Sopenharmony_ci	}
5718c2ecf20Sopenharmony_cierr:
5728c2ecf20Sopenharmony_ci	mutex_unlock(&vsock->dev.mutex);
5738c2ecf20Sopenharmony_ci	return ret;
5748c2ecf20Sopenharmony_ci}
5758c2ecf20Sopenharmony_ci
5768c2ecf20Sopenharmony_cistatic int vhost_vsock_stop(struct vhost_vsock *vsock, bool check_owner)
5778c2ecf20Sopenharmony_ci{
5788c2ecf20Sopenharmony_ci	size_t i;
5798c2ecf20Sopenharmony_ci	int ret = 0;
5808c2ecf20Sopenharmony_ci
5818c2ecf20Sopenharmony_ci	mutex_lock(&vsock->dev.mutex);
5828c2ecf20Sopenharmony_ci
5838c2ecf20Sopenharmony_ci	if (check_owner) {
5848c2ecf20Sopenharmony_ci		ret = vhost_dev_check_owner(&vsock->dev);
5858c2ecf20Sopenharmony_ci		if (ret)
5868c2ecf20Sopenharmony_ci			goto err;
5878c2ecf20Sopenharmony_ci	}
5888c2ecf20Sopenharmony_ci
5898c2ecf20Sopenharmony_ci	for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
5908c2ecf20Sopenharmony_ci		struct vhost_virtqueue *vq = &vsock->vqs[i];
5918c2ecf20Sopenharmony_ci
5928c2ecf20Sopenharmony_ci		mutex_lock(&vq->mutex);
5938c2ecf20Sopenharmony_ci		vhost_vq_set_backend(vq, NULL);
5948c2ecf20Sopenharmony_ci		mutex_unlock(&vq->mutex);
5958c2ecf20Sopenharmony_ci	}
5968c2ecf20Sopenharmony_ci
5978c2ecf20Sopenharmony_cierr:
5988c2ecf20Sopenharmony_ci	mutex_unlock(&vsock->dev.mutex);
5998c2ecf20Sopenharmony_ci	return ret;
6008c2ecf20Sopenharmony_ci}
6018c2ecf20Sopenharmony_ci
6028c2ecf20Sopenharmony_cistatic void vhost_vsock_free(struct vhost_vsock *vsock)
6038c2ecf20Sopenharmony_ci{
6048c2ecf20Sopenharmony_ci	kvfree(vsock);
6058c2ecf20Sopenharmony_ci}
6068c2ecf20Sopenharmony_ci
6078c2ecf20Sopenharmony_cistatic int vhost_vsock_dev_open(struct inode *inode, struct file *file)
6088c2ecf20Sopenharmony_ci{
6098c2ecf20Sopenharmony_ci	struct vhost_virtqueue **vqs;
6108c2ecf20Sopenharmony_ci	struct vhost_vsock *vsock;
6118c2ecf20Sopenharmony_ci	int ret;
6128c2ecf20Sopenharmony_ci
6138c2ecf20Sopenharmony_ci	/* This struct is large and allocation could fail, fall back to vmalloc
6148c2ecf20Sopenharmony_ci	 * if there is no other way.
6158c2ecf20Sopenharmony_ci	 */
6168c2ecf20Sopenharmony_ci	vsock = kvmalloc(sizeof(*vsock), GFP_KERNEL | __GFP_RETRY_MAYFAIL);
6178c2ecf20Sopenharmony_ci	if (!vsock)
6188c2ecf20Sopenharmony_ci		return -ENOMEM;
6198c2ecf20Sopenharmony_ci
6208c2ecf20Sopenharmony_ci	vqs = kmalloc_array(ARRAY_SIZE(vsock->vqs), sizeof(*vqs), GFP_KERNEL);
6218c2ecf20Sopenharmony_ci	if (!vqs) {
6228c2ecf20Sopenharmony_ci		ret = -ENOMEM;
6238c2ecf20Sopenharmony_ci		goto out;
6248c2ecf20Sopenharmony_ci	}
6258c2ecf20Sopenharmony_ci
6268c2ecf20Sopenharmony_ci	vsock->guest_cid = 0; /* no CID assigned yet */
6278c2ecf20Sopenharmony_ci
6288c2ecf20Sopenharmony_ci	atomic_set(&vsock->queued_replies, 0);
6298c2ecf20Sopenharmony_ci
6308c2ecf20Sopenharmony_ci	vqs[VSOCK_VQ_TX] = &vsock->vqs[VSOCK_VQ_TX];
6318c2ecf20Sopenharmony_ci	vqs[VSOCK_VQ_RX] = &vsock->vqs[VSOCK_VQ_RX];
6328c2ecf20Sopenharmony_ci	vsock->vqs[VSOCK_VQ_TX].handle_kick = vhost_vsock_handle_tx_kick;
6338c2ecf20Sopenharmony_ci	vsock->vqs[VSOCK_VQ_RX].handle_kick = vhost_vsock_handle_rx_kick;
6348c2ecf20Sopenharmony_ci
6358c2ecf20Sopenharmony_ci	vhost_dev_init(&vsock->dev, vqs, ARRAY_SIZE(vsock->vqs),
6368c2ecf20Sopenharmony_ci		       UIO_MAXIOV, VHOST_VSOCK_PKT_WEIGHT,
6378c2ecf20Sopenharmony_ci		       VHOST_VSOCK_WEIGHT, true, NULL);
6388c2ecf20Sopenharmony_ci
6398c2ecf20Sopenharmony_ci	file->private_data = vsock;
6408c2ecf20Sopenharmony_ci	spin_lock_init(&vsock->send_pkt_list_lock);
6418c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&vsock->send_pkt_list);
6428c2ecf20Sopenharmony_ci	vhost_work_init(&vsock->send_pkt_work, vhost_transport_send_pkt_work);
6438c2ecf20Sopenharmony_ci	return 0;
6448c2ecf20Sopenharmony_ci
6458c2ecf20Sopenharmony_ciout:
6468c2ecf20Sopenharmony_ci	vhost_vsock_free(vsock);
6478c2ecf20Sopenharmony_ci	return ret;
6488c2ecf20Sopenharmony_ci}
6498c2ecf20Sopenharmony_ci
6508c2ecf20Sopenharmony_cistatic void vhost_vsock_flush(struct vhost_vsock *vsock)
6518c2ecf20Sopenharmony_ci{
6528c2ecf20Sopenharmony_ci	int i;
6538c2ecf20Sopenharmony_ci
6548c2ecf20Sopenharmony_ci	for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++)
6558c2ecf20Sopenharmony_ci		if (vsock->vqs[i].handle_kick)
6568c2ecf20Sopenharmony_ci			vhost_poll_flush(&vsock->vqs[i].poll);
6578c2ecf20Sopenharmony_ci	vhost_work_flush(&vsock->dev, &vsock->send_pkt_work);
6588c2ecf20Sopenharmony_ci}
6598c2ecf20Sopenharmony_ci
6608c2ecf20Sopenharmony_cistatic void vhost_vsock_reset_orphans(struct sock *sk)
6618c2ecf20Sopenharmony_ci{
6628c2ecf20Sopenharmony_ci	struct vsock_sock *vsk = vsock_sk(sk);
6638c2ecf20Sopenharmony_ci
6648c2ecf20Sopenharmony_ci	/* vmci_transport.c doesn't take sk_lock here either.  At least we're
6658c2ecf20Sopenharmony_ci	 * under vsock_table_lock so the sock cannot disappear while we're
6668c2ecf20Sopenharmony_ci	 * executing.
6678c2ecf20Sopenharmony_ci	 */
6688c2ecf20Sopenharmony_ci
6698c2ecf20Sopenharmony_ci	/* If the peer is still valid, no need to reset connection */
6708c2ecf20Sopenharmony_ci	if (vhost_vsock_get(vsk->remote_addr.svm_cid))
6718c2ecf20Sopenharmony_ci		return;
6728c2ecf20Sopenharmony_ci
6738c2ecf20Sopenharmony_ci	/* If the close timeout is pending, let it expire.  This avoids races
6748c2ecf20Sopenharmony_ci	 * with the timeout callback.
6758c2ecf20Sopenharmony_ci	 */
6768c2ecf20Sopenharmony_ci	if (vsk->close_work_scheduled)
6778c2ecf20Sopenharmony_ci		return;
6788c2ecf20Sopenharmony_ci
6798c2ecf20Sopenharmony_ci	sock_set_flag(sk, SOCK_DONE);
6808c2ecf20Sopenharmony_ci	vsk->peer_shutdown = SHUTDOWN_MASK;
6818c2ecf20Sopenharmony_ci	sk->sk_state = SS_UNCONNECTED;
6828c2ecf20Sopenharmony_ci	sk->sk_err = ECONNRESET;
6838c2ecf20Sopenharmony_ci	sk->sk_error_report(sk);
6848c2ecf20Sopenharmony_ci}
6858c2ecf20Sopenharmony_ci
6868c2ecf20Sopenharmony_cistatic int vhost_vsock_dev_release(struct inode *inode, struct file *file)
6878c2ecf20Sopenharmony_ci{
6888c2ecf20Sopenharmony_ci	struct vhost_vsock *vsock = file->private_data;
6898c2ecf20Sopenharmony_ci
6908c2ecf20Sopenharmony_ci	mutex_lock(&vhost_vsock_mutex);
6918c2ecf20Sopenharmony_ci	if (vsock->guest_cid)
6928c2ecf20Sopenharmony_ci		hash_del_rcu(&vsock->hash);
6938c2ecf20Sopenharmony_ci	mutex_unlock(&vhost_vsock_mutex);
6948c2ecf20Sopenharmony_ci
6958c2ecf20Sopenharmony_ci	/* Wait for other CPUs to finish using vsock */
6968c2ecf20Sopenharmony_ci	synchronize_rcu();
6978c2ecf20Sopenharmony_ci
6988c2ecf20Sopenharmony_ci	/* Iterating over all connections for all CIDs to find orphans is
6998c2ecf20Sopenharmony_ci	 * inefficient.  Room for improvement here. */
7008c2ecf20Sopenharmony_ci	vsock_for_each_connected_socket(&vhost_transport.transport,
7018c2ecf20Sopenharmony_ci					vhost_vsock_reset_orphans);
7028c2ecf20Sopenharmony_ci
7038c2ecf20Sopenharmony_ci	/* Don't check the owner, because we are in the release path, so we
7048c2ecf20Sopenharmony_ci	 * need to stop the vsock device in any case.
7058c2ecf20Sopenharmony_ci	 * vhost_vsock_stop() can not fail in this case, so we don't need to
7068c2ecf20Sopenharmony_ci	 * check the return code.
7078c2ecf20Sopenharmony_ci	 */
7088c2ecf20Sopenharmony_ci	vhost_vsock_stop(vsock, false);
7098c2ecf20Sopenharmony_ci	vhost_vsock_flush(vsock);
7108c2ecf20Sopenharmony_ci	vhost_dev_stop(&vsock->dev);
7118c2ecf20Sopenharmony_ci
7128c2ecf20Sopenharmony_ci	spin_lock_bh(&vsock->send_pkt_list_lock);
7138c2ecf20Sopenharmony_ci	while (!list_empty(&vsock->send_pkt_list)) {
7148c2ecf20Sopenharmony_ci		struct virtio_vsock_pkt *pkt;
7158c2ecf20Sopenharmony_ci
7168c2ecf20Sopenharmony_ci		pkt = list_first_entry(&vsock->send_pkt_list,
7178c2ecf20Sopenharmony_ci				struct virtio_vsock_pkt, list);
7188c2ecf20Sopenharmony_ci		list_del_init(&pkt->list);
7198c2ecf20Sopenharmony_ci		virtio_transport_free_pkt(pkt);
7208c2ecf20Sopenharmony_ci	}
7218c2ecf20Sopenharmony_ci	spin_unlock_bh(&vsock->send_pkt_list_lock);
7228c2ecf20Sopenharmony_ci
7238c2ecf20Sopenharmony_ci	vhost_dev_cleanup(&vsock->dev);
7248c2ecf20Sopenharmony_ci	kfree(vsock->dev.vqs);
7258c2ecf20Sopenharmony_ci	vhost_vsock_free(vsock);
7268c2ecf20Sopenharmony_ci	return 0;
7278c2ecf20Sopenharmony_ci}
7288c2ecf20Sopenharmony_ci
7298c2ecf20Sopenharmony_cistatic int vhost_vsock_set_cid(struct vhost_vsock *vsock, u64 guest_cid)
7308c2ecf20Sopenharmony_ci{
7318c2ecf20Sopenharmony_ci	struct vhost_vsock *other;
7328c2ecf20Sopenharmony_ci
7338c2ecf20Sopenharmony_ci	/* Refuse reserved CIDs */
7348c2ecf20Sopenharmony_ci	if (guest_cid <= VMADDR_CID_HOST ||
7358c2ecf20Sopenharmony_ci	    guest_cid == U32_MAX)
7368c2ecf20Sopenharmony_ci		return -EINVAL;
7378c2ecf20Sopenharmony_ci
7388c2ecf20Sopenharmony_ci	/* 64-bit CIDs are not yet supported */
7398c2ecf20Sopenharmony_ci	if (guest_cid > U32_MAX)
7408c2ecf20Sopenharmony_ci		return -EINVAL;
7418c2ecf20Sopenharmony_ci
7428c2ecf20Sopenharmony_ci	/* Refuse if CID is assigned to the guest->host transport (i.e. nested
7438c2ecf20Sopenharmony_ci	 * VM), to make the loopback work.
7448c2ecf20Sopenharmony_ci	 */
7458c2ecf20Sopenharmony_ci	if (vsock_find_cid(guest_cid))
7468c2ecf20Sopenharmony_ci		return -EADDRINUSE;
7478c2ecf20Sopenharmony_ci
7488c2ecf20Sopenharmony_ci	/* Refuse if CID is already in use */
7498c2ecf20Sopenharmony_ci	mutex_lock(&vhost_vsock_mutex);
7508c2ecf20Sopenharmony_ci	other = vhost_vsock_get(guest_cid);
7518c2ecf20Sopenharmony_ci	if (other && other != vsock) {
7528c2ecf20Sopenharmony_ci		mutex_unlock(&vhost_vsock_mutex);
7538c2ecf20Sopenharmony_ci		return -EADDRINUSE;
7548c2ecf20Sopenharmony_ci	}
7558c2ecf20Sopenharmony_ci
7568c2ecf20Sopenharmony_ci	if (vsock->guest_cid)
7578c2ecf20Sopenharmony_ci		hash_del_rcu(&vsock->hash);
7588c2ecf20Sopenharmony_ci
7598c2ecf20Sopenharmony_ci	vsock->guest_cid = guest_cid;
7608c2ecf20Sopenharmony_ci	hash_add_rcu(vhost_vsock_hash, &vsock->hash, vsock->guest_cid);
7618c2ecf20Sopenharmony_ci	mutex_unlock(&vhost_vsock_mutex);
7628c2ecf20Sopenharmony_ci
7638c2ecf20Sopenharmony_ci	return 0;
7648c2ecf20Sopenharmony_ci}
7658c2ecf20Sopenharmony_ci
7668c2ecf20Sopenharmony_cistatic int vhost_vsock_set_features(struct vhost_vsock *vsock, u64 features)
7678c2ecf20Sopenharmony_ci{
7688c2ecf20Sopenharmony_ci	struct vhost_virtqueue *vq;
7698c2ecf20Sopenharmony_ci	int i;
7708c2ecf20Sopenharmony_ci
7718c2ecf20Sopenharmony_ci	if (features & ~VHOST_VSOCK_FEATURES)
7728c2ecf20Sopenharmony_ci		return -EOPNOTSUPP;
7738c2ecf20Sopenharmony_ci
7748c2ecf20Sopenharmony_ci	mutex_lock(&vsock->dev.mutex);
7758c2ecf20Sopenharmony_ci	if ((features & (1 << VHOST_F_LOG_ALL)) &&
7768c2ecf20Sopenharmony_ci	    !vhost_log_access_ok(&vsock->dev)) {
7778c2ecf20Sopenharmony_ci		mutex_unlock(&vsock->dev.mutex);
7788c2ecf20Sopenharmony_ci		return -EFAULT;
7798c2ecf20Sopenharmony_ci	}
7808c2ecf20Sopenharmony_ci
7818c2ecf20Sopenharmony_ci	for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
7828c2ecf20Sopenharmony_ci		vq = &vsock->vqs[i];
7838c2ecf20Sopenharmony_ci		mutex_lock(&vq->mutex);
7848c2ecf20Sopenharmony_ci		vq->acked_features = features;
7858c2ecf20Sopenharmony_ci		mutex_unlock(&vq->mutex);
7868c2ecf20Sopenharmony_ci	}
7878c2ecf20Sopenharmony_ci	mutex_unlock(&vsock->dev.mutex);
7888c2ecf20Sopenharmony_ci	return 0;
7898c2ecf20Sopenharmony_ci}
7908c2ecf20Sopenharmony_ci
7918c2ecf20Sopenharmony_cistatic long vhost_vsock_dev_ioctl(struct file *f, unsigned int ioctl,
7928c2ecf20Sopenharmony_ci				  unsigned long arg)
7938c2ecf20Sopenharmony_ci{
7948c2ecf20Sopenharmony_ci	struct vhost_vsock *vsock = f->private_data;
7958c2ecf20Sopenharmony_ci	void __user *argp = (void __user *)arg;
7968c2ecf20Sopenharmony_ci	u64 guest_cid;
7978c2ecf20Sopenharmony_ci	u64 features;
7988c2ecf20Sopenharmony_ci	int start;
7998c2ecf20Sopenharmony_ci	int r;
8008c2ecf20Sopenharmony_ci
8018c2ecf20Sopenharmony_ci	switch (ioctl) {
8028c2ecf20Sopenharmony_ci	case VHOST_VSOCK_SET_GUEST_CID:
8038c2ecf20Sopenharmony_ci		if (copy_from_user(&guest_cid, argp, sizeof(guest_cid)))
8048c2ecf20Sopenharmony_ci			return -EFAULT;
8058c2ecf20Sopenharmony_ci		return vhost_vsock_set_cid(vsock, guest_cid);
8068c2ecf20Sopenharmony_ci	case VHOST_VSOCK_SET_RUNNING:
8078c2ecf20Sopenharmony_ci		if (copy_from_user(&start, argp, sizeof(start)))
8088c2ecf20Sopenharmony_ci			return -EFAULT;
8098c2ecf20Sopenharmony_ci		if (start)
8108c2ecf20Sopenharmony_ci			return vhost_vsock_start(vsock);
8118c2ecf20Sopenharmony_ci		else
8128c2ecf20Sopenharmony_ci			return vhost_vsock_stop(vsock, true);
8138c2ecf20Sopenharmony_ci	case VHOST_GET_FEATURES:
8148c2ecf20Sopenharmony_ci		features = VHOST_VSOCK_FEATURES;
8158c2ecf20Sopenharmony_ci		if (copy_to_user(argp, &features, sizeof(features)))
8168c2ecf20Sopenharmony_ci			return -EFAULT;
8178c2ecf20Sopenharmony_ci		return 0;
8188c2ecf20Sopenharmony_ci	case VHOST_SET_FEATURES:
8198c2ecf20Sopenharmony_ci		if (copy_from_user(&features, argp, sizeof(features)))
8208c2ecf20Sopenharmony_ci			return -EFAULT;
8218c2ecf20Sopenharmony_ci		return vhost_vsock_set_features(vsock, features);
8228c2ecf20Sopenharmony_ci	default:
8238c2ecf20Sopenharmony_ci		mutex_lock(&vsock->dev.mutex);
8248c2ecf20Sopenharmony_ci		r = vhost_dev_ioctl(&vsock->dev, ioctl, argp);
8258c2ecf20Sopenharmony_ci		if (r == -ENOIOCTLCMD)
8268c2ecf20Sopenharmony_ci			r = vhost_vring_ioctl(&vsock->dev, ioctl, argp);
8278c2ecf20Sopenharmony_ci		else
8288c2ecf20Sopenharmony_ci			vhost_vsock_flush(vsock);
8298c2ecf20Sopenharmony_ci		mutex_unlock(&vsock->dev.mutex);
8308c2ecf20Sopenharmony_ci		return r;
8318c2ecf20Sopenharmony_ci	}
8328c2ecf20Sopenharmony_ci}
8338c2ecf20Sopenharmony_ci
8348c2ecf20Sopenharmony_cistatic const struct file_operations vhost_vsock_fops = {
8358c2ecf20Sopenharmony_ci	.owner          = THIS_MODULE,
8368c2ecf20Sopenharmony_ci	.open           = vhost_vsock_dev_open,
8378c2ecf20Sopenharmony_ci	.release        = vhost_vsock_dev_release,
8388c2ecf20Sopenharmony_ci	.llseek		= noop_llseek,
8398c2ecf20Sopenharmony_ci	.unlocked_ioctl = vhost_vsock_dev_ioctl,
8408c2ecf20Sopenharmony_ci	.compat_ioctl   = compat_ptr_ioctl,
8418c2ecf20Sopenharmony_ci};
8428c2ecf20Sopenharmony_ci
8438c2ecf20Sopenharmony_cistatic struct miscdevice vhost_vsock_misc = {
8448c2ecf20Sopenharmony_ci	.minor = VHOST_VSOCK_MINOR,
8458c2ecf20Sopenharmony_ci	.name = "vhost-vsock",
8468c2ecf20Sopenharmony_ci	.fops = &vhost_vsock_fops,
8478c2ecf20Sopenharmony_ci};
8488c2ecf20Sopenharmony_ci
8498c2ecf20Sopenharmony_cistatic int __init vhost_vsock_init(void)
8508c2ecf20Sopenharmony_ci{
8518c2ecf20Sopenharmony_ci	int ret;
8528c2ecf20Sopenharmony_ci
8538c2ecf20Sopenharmony_ci	ret = vsock_core_register(&vhost_transport.transport,
8548c2ecf20Sopenharmony_ci				  VSOCK_TRANSPORT_F_H2G);
8558c2ecf20Sopenharmony_ci	if (ret < 0)
8568c2ecf20Sopenharmony_ci		return ret;
8578c2ecf20Sopenharmony_ci
8588c2ecf20Sopenharmony_ci	ret = misc_register(&vhost_vsock_misc);
8598c2ecf20Sopenharmony_ci	if (ret) {
8608c2ecf20Sopenharmony_ci		vsock_core_unregister(&vhost_transport.transport);
8618c2ecf20Sopenharmony_ci		return ret;
8628c2ecf20Sopenharmony_ci	}
8638c2ecf20Sopenharmony_ci
8648c2ecf20Sopenharmony_ci	return 0;
8658c2ecf20Sopenharmony_ci};
8668c2ecf20Sopenharmony_ci
8678c2ecf20Sopenharmony_cistatic void __exit vhost_vsock_exit(void)
8688c2ecf20Sopenharmony_ci{
8698c2ecf20Sopenharmony_ci	misc_deregister(&vhost_vsock_misc);
8708c2ecf20Sopenharmony_ci	vsock_core_unregister(&vhost_transport.transport);
8718c2ecf20Sopenharmony_ci};
8728c2ecf20Sopenharmony_ci
8738c2ecf20Sopenharmony_cimodule_init(vhost_vsock_init);
8748c2ecf20Sopenharmony_cimodule_exit(vhost_vsock_exit);
8758c2ecf20Sopenharmony_ciMODULE_LICENSE("GPL v2");
8768c2ecf20Sopenharmony_ciMODULE_AUTHOR("Asias He");
8778c2ecf20Sopenharmony_ciMODULE_DESCRIPTION("vhost transport for vsock ");
8788c2ecf20Sopenharmony_ciMODULE_ALIAS_MISCDEV(VHOST_VSOCK_MINOR);
8798c2ecf20Sopenharmony_ciMODULE_ALIAS("devname:vhost-vsock");
880