18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * Hyper-V transport for vsock
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci * Hyper-V Sockets supplies a byte-stream based communication mechanism
68c2ecf20Sopenharmony_ci * between the host and the VM. This driver implements the necessary
78c2ecf20Sopenharmony_ci * support in the VM by introducing the new vsock transport.
88c2ecf20Sopenharmony_ci *
98c2ecf20Sopenharmony_ci * Copyright (c) 2017, Microsoft Corporation.
108c2ecf20Sopenharmony_ci */
118c2ecf20Sopenharmony_ci#include <linux/module.h>
128c2ecf20Sopenharmony_ci#include <linux/vmalloc.h>
138c2ecf20Sopenharmony_ci#include <linux/hyperv.h>
148c2ecf20Sopenharmony_ci#include <net/sock.h>
158c2ecf20Sopenharmony_ci#include <net/af_vsock.h>
168c2ecf20Sopenharmony_ci#include <asm/hyperv-tlfs.h>
178c2ecf20Sopenharmony_ci
188c2ecf20Sopenharmony_ci/* Older (VMBUS version 'VERSION_WIN10' or before) Windows hosts have some
198c2ecf20Sopenharmony_ci * stricter requirements on the hv_sock ring buffer size of six 4K pages.
208c2ecf20Sopenharmony_ci * hyperv-tlfs defines HV_HYP_PAGE_SIZE as 4K. Newer hosts don't have this
218c2ecf20Sopenharmony_ci * limitation; but, keep the defaults the same for compat.
228c2ecf20Sopenharmony_ci */
238c2ecf20Sopenharmony_ci#define RINGBUFFER_HVS_RCV_SIZE (HV_HYP_PAGE_SIZE * 6)
248c2ecf20Sopenharmony_ci#define RINGBUFFER_HVS_SND_SIZE (HV_HYP_PAGE_SIZE * 6)
258c2ecf20Sopenharmony_ci#define RINGBUFFER_HVS_MAX_SIZE (HV_HYP_PAGE_SIZE * 64)
268c2ecf20Sopenharmony_ci
278c2ecf20Sopenharmony_ci/* The MTU is 16KB per the host side's design */
288c2ecf20Sopenharmony_ci#define HVS_MTU_SIZE		(1024 * 16)
298c2ecf20Sopenharmony_ci
308c2ecf20Sopenharmony_ci/* How long to wait for graceful shutdown of a connection */
318c2ecf20Sopenharmony_ci#define HVS_CLOSE_TIMEOUT (8 * HZ)
328c2ecf20Sopenharmony_ci
338c2ecf20Sopenharmony_cistruct vmpipe_proto_header {
348c2ecf20Sopenharmony_ci	u32 pkt_type;
358c2ecf20Sopenharmony_ci	u32 data_size;
368c2ecf20Sopenharmony_ci};
378c2ecf20Sopenharmony_ci
388c2ecf20Sopenharmony_ci/* For recv, we use the VMBus in-place packet iterator APIs to directly copy
398c2ecf20Sopenharmony_ci * data from the ringbuffer into the userspace buffer.
408c2ecf20Sopenharmony_ci */
418c2ecf20Sopenharmony_cistruct hvs_recv_buf {
428c2ecf20Sopenharmony_ci	/* The header before the payload data */
438c2ecf20Sopenharmony_ci	struct vmpipe_proto_header hdr;
448c2ecf20Sopenharmony_ci
458c2ecf20Sopenharmony_ci	/* The payload */
468c2ecf20Sopenharmony_ci	u8 data[HVS_MTU_SIZE];
478c2ecf20Sopenharmony_ci};
488c2ecf20Sopenharmony_ci
498c2ecf20Sopenharmony_ci/* We can send up to HVS_MTU_SIZE bytes of payload to the host, but let's use
508c2ecf20Sopenharmony_ci * a smaller size, i.e. HVS_SEND_BUF_SIZE, to maximize concurrency between the
518c2ecf20Sopenharmony_ci * guest and the host processing as one VMBUS packet is the smallest processing
528c2ecf20Sopenharmony_ci * unit.
538c2ecf20Sopenharmony_ci *
548c2ecf20Sopenharmony_ci * Note: the buffer can be eliminated in the future when we add new VMBus
558c2ecf20Sopenharmony_ci * ringbuffer APIs that allow us to directly copy data from userspace buffer
568c2ecf20Sopenharmony_ci * to VMBus ringbuffer.
578c2ecf20Sopenharmony_ci */
588c2ecf20Sopenharmony_ci#define HVS_SEND_BUF_SIZE \
598c2ecf20Sopenharmony_ci		(HV_HYP_PAGE_SIZE - sizeof(struct vmpipe_proto_header))
608c2ecf20Sopenharmony_ci
618c2ecf20Sopenharmony_cistruct hvs_send_buf {
628c2ecf20Sopenharmony_ci	/* The header before the payload data */
638c2ecf20Sopenharmony_ci	struct vmpipe_proto_header hdr;
648c2ecf20Sopenharmony_ci
658c2ecf20Sopenharmony_ci	/* The payload */
668c2ecf20Sopenharmony_ci	u8 data[HVS_SEND_BUF_SIZE];
678c2ecf20Sopenharmony_ci};
688c2ecf20Sopenharmony_ci
698c2ecf20Sopenharmony_ci#define HVS_HEADER_LEN	(sizeof(struct vmpacket_descriptor) + \
708c2ecf20Sopenharmony_ci			 sizeof(struct vmpipe_proto_header))
718c2ecf20Sopenharmony_ci
728c2ecf20Sopenharmony_ci/* See 'prev_indices' in hv_ringbuffer_read(), hv_ringbuffer_write(), and
738c2ecf20Sopenharmony_ci * __hv_pkt_iter_next().
748c2ecf20Sopenharmony_ci */
758c2ecf20Sopenharmony_ci#define VMBUS_PKT_TRAILER_SIZE	(sizeof(u64))
768c2ecf20Sopenharmony_ci
778c2ecf20Sopenharmony_ci#define HVS_PKT_LEN(payload_len)	(HVS_HEADER_LEN + \
788c2ecf20Sopenharmony_ci					 ALIGN((payload_len), 8) + \
798c2ecf20Sopenharmony_ci					 VMBUS_PKT_TRAILER_SIZE)
808c2ecf20Sopenharmony_ci
818c2ecf20Sopenharmony_ciunion hvs_service_id {
828c2ecf20Sopenharmony_ci	guid_t	srv_id;
838c2ecf20Sopenharmony_ci
848c2ecf20Sopenharmony_ci	struct {
858c2ecf20Sopenharmony_ci		unsigned int svm_port;
868c2ecf20Sopenharmony_ci		unsigned char b[sizeof(guid_t) - sizeof(unsigned int)];
878c2ecf20Sopenharmony_ci	};
888c2ecf20Sopenharmony_ci};
898c2ecf20Sopenharmony_ci
908c2ecf20Sopenharmony_ci/* Per-socket state (accessed via vsk->trans) */
918c2ecf20Sopenharmony_cistruct hvsock {
928c2ecf20Sopenharmony_ci	struct vsock_sock *vsk;
938c2ecf20Sopenharmony_ci
948c2ecf20Sopenharmony_ci	guid_t vm_srv_id;
958c2ecf20Sopenharmony_ci	guid_t host_srv_id;
968c2ecf20Sopenharmony_ci
978c2ecf20Sopenharmony_ci	struct vmbus_channel *chan;
988c2ecf20Sopenharmony_ci	struct vmpacket_descriptor *recv_desc;
998c2ecf20Sopenharmony_ci
1008c2ecf20Sopenharmony_ci	/* The length of the payload not delivered to userland yet */
1018c2ecf20Sopenharmony_ci	u32 recv_data_len;
1028c2ecf20Sopenharmony_ci	/* The offset of the payload */
1038c2ecf20Sopenharmony_ci	u32 recv_data_off;
1048c2ecf20Sopenharmony_ci
1058c2ecf20Sopenharmony_ci	/* Have we sent the zero-length packet (FIN)? */
1068c2ecf20Sopenharmony_ci	bool fin_sent;
1078c2ecf20Sopenharmony_ci};
1088c2ecf20Sopenharmony_ci
1098c2ecf20Sopenharmony_ci/* In the VM, we support Hyper-V Sockets with AF_VSOCK, and the endpoint is
1108c2ecf20Sopenharmony_ci * <cid, port> (see struct sockaddr_vm). Note: cid is not really used here:
1118c2ecf20Sopenharmony_ci * when we write apps to connect to the host, we can only use VMADDR_CID_ANY
1128c2ecf20Sopenharmony_ci * or VMADDR_CID_HOST (both are equivalent) as the remote cid, and when we
1138c2ecf20Sopenharmony_ci * write apps to bind() & listen() in the VM, we can only use VMADDR_CID_ANY
1148c2ecf20Sopenharmony_ci * as the local cid.
1158c2ecf20Sopenharmony_ci *
1168c2ecf20Sopenharmony_ci * On the host, Hyper-V Sockets are supported by Winsock AF_HYPERV:
1178c2ecf20Sopenharmony_ci * https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/user-
1188c2ecf20Sopenharmony_ci * guide/make-integration-service, and the endpoint is <VmID, ServiceId> with
1198c2ecf20Sopenharmony_ci * the below sockaddr:
1208c2ecf20Sopenharmony_ci *
1218c2ecf20Sopenharmony_ci * struct SOCKADDR_HV
1228c2ecf20Sopenharmony_ci * {
1238c2ecf20Sopenharmony_ci *    ADDRESS_FAMILY Family;
1248c2ecf20Sopenharmony_ci *    USHORT Reserved;
1258c2ecf20Sopenharmony_ci *    GUID VmId;
1268c2ecf20Sopenharmony_ci *    GUID ServiceId;
1278c2ecf20Sopenharmony_ci * };
1288c2ecf20Sopenharmony_ci * Note: VmID is not used by Linux VM and actually it isn't transmitted via
1298c2ecf20Sopenharmony_ci * VMBus, because here it's obvious the host and the VM can easily identify
1308c2ecf20Sopenharmony_ci * each other. Though the VmID is useful on the host, especially in the case
1318c2ecf20Sopenharmony_ci * of Windows container, Linux VM doesn't need it at all.
1328c2ecf20Sopenharmony_ci *
1338c2ecf20Sopenharmony_ci * To make use of the AF_VSOCK infrastructure in Linux VM, we have to limit
1348c2ecf20Sopenharmony_ci * the available GUID space of SOCKADDR_HV so that we can create a mapping
1358c2ecf20Sopenharmony_ci * between AF_VSOCK port and SOCKADDR_HV Service GUID. The rule of writing
1368c2ecf20Sopenharmony_ci * Hyper-V Sockets apps on the host and in Linux VM is:
1378c2ecf20Sopenharmony_ci *
1388c2ecf20Sopenharmony_ci ****************************************************************************
1398c2ecf20Sopenharmony_ci * The only valid Service GUIDs, from the perspectives of both the host and *
1408c2ecf20Sopenharmony_ci * Linux VM, that can be connected by the other end, must conform to this   *
1418c2ecf20Sopenharmony_ci * format: <port>-facb-11e6-bd58-64006a7986d3.                              *
1428c2ecf20Sopenharmony_ci ****************************************************************************
1438c2ecf20Sopenharmony_ci *
1448c2ecf20Sopenharmony_ci * When we write apps on the host to connect(), the GUID ServiceID is used.
1458c2ecf20Sopenharmony_ci * When we write apps in Linux VM to connect(), we only need to specify the
1468c2ecf20Sopenharmony_ci * port and the driver will form the GUID and use that to request the host.
1478c2ecf20Sopenharmony_ci *
1488c2ecf20Sopenharmony_ci */
1498c2ecf20Sopenharmony_ci
1508c2ecf20Sopenharmony_ci/* 00000000-facb-11e6-bd58-64006a7986d3 */
1518c2ecf20Sopenharmony_cistatic const guid_t srv_id_template =
1528c2ecf20Sopenharmony_ci	GUID_INIT(0x00000000, 0xfacb, 0x11e6, 0xbd, 0x58,
1538c2ecf20Sopenharmony_ci		  0x64, 0x00, 0x6a, 0x79, 0x86, 0xd3);
1548c2ecf20Sopenharmony_ci
1558c2ecf20Sopenharmony_cistatic bool hvs_check_transport(struct vsock_sock *vsk);
1568c2ecf20Sopenharmony_ci
1578c2ecf20Sopenharmony_cistatic bool is_valid_srv_id(const guid_t *id)
1588c2ecf20Sopenharmony_ci{
1598c2ecf20Sopenharmony_ci	return !memcmp(&id->b[4], &srv_id_template.b[4], sizeof(guid_t) - 4);
1608c2ecf20Sopenharmony_ci}
1618c2ecf20Sopenharmony_ci
1628c2ecf20Sopenharmony_cistatic unsigned int get_port_by_srv_id(const guid_t *svr_id)
1638c2ecf20Sopenharmony_ci{
1648c2ecf20Sopenharmony_ci	return *((unsigned int *)svr_id);
1658c2ecf20Sopenharmony_ci}
1668c2ecf20Sopenharmony_ci
1678c2ecf20Sopenharmony_cistatic void hvs_addr_init(struct sockaddr_vm *addr, const guid_t *svr_id)
1688c2ecf20Sopenharmony_ci{
1698c2ecf20Sopenharmony_ci	unsigned int port = get_port_by_srv_id(svr_id);
1708c2ecf20Sopenharmony_ci
1718c2ecf20Sopenharmony_ci	vsock_addr_init(addr, VMADDR_CID_ANY, port);
1728c2ecf20Sopenharmony_ci}
1738c2ecf20Sopenharmony_ci
1748c2ecf20Sopenharmony_cistatic void hvs_set_channel_pending_send_size(struct vmbus_channel *chan)
1758c2ecf20Sopenharmony_ci{
1768c2ecf20Sopenharmony_ci	set_channel_pending_send_size(chan,
1778c2ecf20Sopenharmony_ci				      HVS_PKT_LEN(HVS_SEND_BUF_SIZE));
1788c2ecf20Sopenharmony_ci
1798c2ecf20Sopenharmony_ci	virt_mb();
1808c2ecf20Sopenharmony_ci}
1818c2ecf20Sopenharmony_ci
1828c2ecf20Sopenharmony_cistatic bool hvs_channel_readable(struct vmbus_channel *chan)
1838c2ecf20Sopenharmony_ci{
1848c2ecf20Sopenharmony_ci	u32 readable = hv_get_bytes_to_read(&chan->inbound);
1858c2ecf20Sopenharmony_ci
1868c2ecf20Sopenharmony_ci	/* 0-size payload means FIN */
1878c2ecf20Sopenharmony_ci	return readable >= HVS_PKT_LEN(0);
1888c2ecf20Sopenharmony_ci}
1898c2ecf20Sopenharmony_ci
1908c2ecf20Sopenharmony_cistatic int hvs_channel_readable_payload(struct vmbus_channel *chan)
1918c2ecf20Sopenharmony_ci{
1928c2ecf20Sopenharmony_ci	u32 readable = hv_get_bytes_to_read(&chan->inbound);
1938c2ecf20Sopenharmony_ci
1948c2ecf20Sopenharmony_ci	if (readable > HVS_PKT_LEN(0)) {
1958c2ecf20Sopenharmony_ci		/* At least we have 1 byte to read. We don't need to return
1968c2ecf20Sopenharmony_ci		 * the exact readable bytes: see vsock_stream_recvmsg() ->
1978c2ecf20Sopenharmony_ci		 * vsock_stream_has_data().
1988c2ecf20Sopenharmony_ci		 */
1998c2ecf20Sopenharmony_ci		return 1;
2008c2ecf20Sopenharmony_ci	}
2018c2ecf20Sopenharmony_ci
2028c2ecf20Sopenharmony_ci	if (readable == HVS_PKT_LEN(0)) {
2038c2ecf20Sopenharmony_ci		/* 0-size payload means FIN */
2048c2ecf20Sopenharmony_ci		return 0;
2058c2ecf20Sopenharmony_ci	}
2068c2ecf20Sopenharmony_ci
2078c2ecf20Sopenharmony_ci	/* No payload or FIN */
2088c2ecf20Sopenharmony_ci	return -1;
2098c2ecf20Sopenharmony_ci}
2108c2ecf20Sopenharmony_ci
2118c2ecf20Sopenharmony_cistatic size_t hvs_channel_writable_bytes(struct vmbus_channel *chan)
2128c2ecf20Sopenharmony_ci{
2138c2ecf20Sopenharmony_ci	u32 writeable = hv_get_bytes_to_write(&chan->outbound);
2148c2ecf20Sopenharmony_ci	size_t ret;
2158c2ecf20Sopenharmony_ci
2168c2ecf20Sopenharmony_ci	/* The ringbuffer mustn't be 100% full, and we should reserve a
2178c2ecf20Sopenharmony_ci	 * zero-length-payload packet for the FIN: see hv_ringbuffer_write()
2188c2ecf20Sopenharmony_ci	 * and hvs_shutdown().
2198c2ecf20Sopenharmony_ci	 */
2208c2ecf20Sopenharmony_ci	if (writeable <= HVS_PKT_LEN(1) + HVS_PKT_LEN(0))
2218c2ecf20Sopenharmony_ci		return 0;
2228c2ecf20Sopenharmony_ci
2238c2ecf20Sopenharmony_ci	ret = writeable - HVS_PKT_LEN(1) - HVS_PKT_LEN(0);
2248c2ecf20Sopenharmony_ci
2258c2ecf20Sopenharmony_ci	return round_down(ret, 8);
2268c2ecf20Sopenharmony_ci}
2278c2ecf20Sopenharmony_ci
2288c2ecf20Sopenharmony_cistatic int hvs_send_data(struct vmbus_channel *chan,
2298c2ecf20Sopenharmony_ci			 struct hvs_send_buf *send_buf, size_t to_write)
2308c2ecf20Sopenharmony_ci{
2318c2ecf20Sopenharmony_ci	send_buf->hdr.pkt_type = 1;
2328c2ecf20Sopenharmony_ci	send_buf->hdr.data_size = to_write;
2338c2ecf20Sopenharmony_ci	return vmbus_sendpacket(chan, &send_buf->hdr,
2348c2ecf20Sopenharmony_ci				sizeof(send_buf->hdr) + to_write,
2358c2ecf20Sopenharmony_ci				0, VM_PKT_DATA_INBAND, 0);
2368c2ecf20Sopenharmony_ci}
2378c2ecf20Sopenharmony_ci
2388c2ecf20Sopenharmony_cistatic void hvs_channel_cb(void *ctx)
2398c2ecf20Sopenharmony_ci{
2408c2ecf20Sopenharmony_ci	struct sock *sk = (struct sock *)ctx;
2418c2ecf20Sopenharmony_ci	struct vsock_sock *vsk = vsock_sk(sk);
2428c2ecf20Sopenharmony_ci	struct hvsock *hvs = vsk->trans;
2438c2ecf20Sopenharmony_ci	struct vmbus_channel *chan = hvs->chan;
2448c2ecf20Sopenharmony_ci
2458c2ecf20Sopenharmony_ci	if (hvs_channel_readable(chan))
2468c2ecf20Sopenharmony_ci		sk->sk_data_ready(sk);
2478c2ecf20Sopenharmony_ci
2488c2ecf20Sopenharmony_ci	if (hv_get_bytes_to_write(&chan->outbound) > 0)
2498c2ecf20Sopenharmony_ci		sk->sk_write_space(sk);
2508c2ecf20Sopenharmony_ci}
2518c2ecf20Sopenharmony_ci
2528c2ecf20Sopenharmony_cistatic void hvs_do_close_lock_held(struct vsock_sock *vsk,
2538c2ecf20Sopenharmony_ci				   bool cancel_timeout)
2548c2ecf20Sopenharmony_ci{
2558c2ecf20Sopenharmony_ci	struct sock *sk = sk_vsock(vsk);
2568c2ecf20Sopenharmony_ci
2578c2ecf20Sopenharmony_ci	sock_set_flag(sk, SOCK_DONE);
2588c2ecf20Sopenharmony_ci	vsk->peer_shutdown = SHUTDOWN_MASK;
2598c2ecf20Sopenharmony_ci	if (vsock_stream_has_data(vsk) <= 0)
2608c2ecf20Sopenharmony_ci		sk->sk_state = TCP_CLOSING;
2618c2ecf20Sopenharmony_ci	sk->sk_state_change(sk);
2628c2ecf20Sopenharmony_ci	if (vsk->close_work_scheduled &&
2638c2ecf20Sopenharmony_ci	    (!cancel_timeout || cancel_delayed_work(&vsk->close_work))) {
2648c2ecf20Sopenharmony_ci		vsk->close_work_scheduled = false;
2658c2ecf20Sopenharmony_ci		vsock_remove_sock(vsk);
2668c2ecf20Sopenharmony_ci
2678c2ecf20Sopenharmony_ci		/* Release the reference taken while scheduling the timeout */
2688c2ecf20Sopenharmony_ci		sock_put(sk);
2698c2ecf20Sopenharmony_ci	}
2708c2ecf20Sopenharmony_ci}
2718c2ecf20Sopenharmony_ci
2728c2ecf20Sopenharmony_cistatic void hvs_close_connection(struct vmbus_channel *chan)
2738c2ecf20Sopenharmony_ci{
2748c2ecf20Sopenharmony_ci	struct sock *sk = get_per_channel_state(chan);
2758c2ecf20Sopenharmony_ci
2768c2ecf20Sopenharmony_ci	lock_sock(sk);
2778c2ecf20Sopenharmony_ci	hvs_do_close_lock_held(vsock_sk(sk), true);
2788c2ecf20Sopenharmony_ci	release_sock(sk);
2798c2ecf20Sopenharmony_ci
2808c2ecf20Sopenharmony_ci	/* Release the refcnt for the channel that's opened in
2818c2ecf20Sopenharmony_ci	 * hvs_open_connection().
2828c2ecf20Sopenharmony_ci	 */
2838c2ecf20Sopenharmony_ci	sock_put(sk);
2848c2ecf20Sopenharmony_ci}
2858c2ecf20Sopenharmony_ci
2868c2ecf20Sopenharmony_cistatic void hvs_open_connection(struct vmbus_channel *chan)
2878c2ecf20Sopenharmony_ci{
2888c2ecf20Sopenharmony_ci	guid_t *if_instance, *if_type;
2898c2ecf20Sopenharmony_ci	unsigned char conn_from_host;
2908c2ecf20Sopenharmony_ci
2918c2ecf20Sopenharmony_ci	struct sockaddr_vm addr;
2928c2ecf20Sopenharmony_ci	struct sock *sk, *new = NULL;
2938c2ecf20Sopenharmony_ci	struct vsock_sock *vnew = NULL;
2948c2ecf20Sopenharmony_ci	struct hvsock *hvs = NULL;
2958c2ecf20Sopenharmony_ci	struct hvsock *hvs_new = NULL;
2968c2ecf20Sopenharmony_ci	int rcvbuf;
2978c2ecf20Sopenharmony_ci	int ret;
2988c2ecf20Sopenharmony_ci	int sndbuf;
2998c2ecf20Sopenharmony_ci
3008c2ecf20Sopenharmony_ci	if_type = &chan->offermsg.offer.if_type;
3018c2ecf20Sopenharmony_ci	if_instance = &chan->offermsg.offer.if_instance;
3028c2ecf20Sopenharmony_ci	conn_from_host = chan->offermsg.offer.u.pipe.user_def[0];
3038c2ecf20Sopenharmony_ci	if (!is_valid_srv_id(if_type))
3048c2ecf20Sopenharmony_ci		return;
3058c2ecf20Sopenharmony_ci
3068c2ecf20Sopenharmony_ci	hvs_addr_init(&addr, conn_from_host ? if_type : if_instance);
3078c2ecf20Sopenharmony_ci	sk = vsock_find_bound_socket(&addr);
3088c2ecf20Sopenharmony_ci	if (!sk)
3098c2ecf20Sopenharmony_ci		return;
3108c2ecf20Sopenharmony_ci
3118c2ecf20Sopenharmony_ci	lock_sock(sk);
3128c2ecf20Sopenharmony_ci	if ((conn_from_host && sk->sk_state != TCP_LISTEN) ||
3138c2ecf20Sopenharmony_ci	    (!conn_from_host && sk->sk_state != TCP_SYN_SENT))
3148c2ecf20Sopenharmony_ci		goto out;
3158c2ecf20Sopenharmony_ci
3168c2ecf20Sopenharmony_ci	if (conn_from_host) {
3178c2ecf20Sopenharmony_ci		if (sk->sk_ack_backlog >= sk->sk_max_ack_backlog)
3188c2ecf20Sopenharmony_ci			goto out;
3198c2ecf20Sopenharmony_ci
3208c2ecf20Sopenharmony_ci		new = vsock_create_connected(sk);
3218c2ecf20Sopenharmony_ci		if (!new)
3228c2ecf20Sopenharmony_ci			goto out;
3238c2ecf20Sopenharmony_ci
3248c2ecf20Sopenharmony_ci		new->sk_state = TCP_SYN_SENT;
3258c2ecf20Sopenharmony_ci		vnew = vsock_sk(new);
3268c2ecf20Sopenharmony_ci
3278c2ecf20Sopenharmony_ci		hvs_addr_init(&vnew->local_addr, if_type);
3288c2ecf20Sopenharmony_ci
3298c2ecf20Sopenharmony_ci		/* Remote peer is always the host */
3308c2ecf20Sopenharmony_ci		vsock_addr_init(&vnew->remote_addr,
3318c2ecf20Sopenharmony_ci				VMADDR_CID_HOST, VMADDR_PORT_ANY);
3328c2ecf20Sopenharmony_ci		vnew->remote_addr.svm_port = get_port_by_srv_id(if_instance);
3338c2ecf20Sopenharmony_ci		ret = vsock_assign_transport(vnew, vsock_sk(sk));
3348c2ecf20Sopenharmony_ci		/* Transport assigned (looking at remote_addr) must be the
3358c2ecf20Sopenharmony_ci		 * same where we received the request.
3368c2ecf20Sopenharmony_ci		 */
3378c2ecf20Sopenharmony_ci		if (ret || !hvs_check_transport(vnew)) {
3388c2ecf20Sopenharmony_ci			sock_put(new);
3398c2ecf20Sopenharmony_ci			goto out;
3408c2ecf20Sopenharmony_ci		}
3418c2ecf20Sopenharmony_ci		hvs_new = vnew->trans;
3428c2ecf20Sopenharmony_ci		hvs_new->chan = chan;
3438c2ecf20Sopenharmony_ci	} else {
3448c2ecf20Sopenharmony_ci		hvs = vsock_sk(sk)->trans;
3458c2ecf20Sopenharmony_ci		hvs->chan = chan;
3468c2ecf20Sopenharmony_ci	}
3478c2ecf20Sopenharmony_ci
3488c2ecf20Sopenharmony_ci	set_channel_read_mode(chan, HV_CALL_DIRECT);
3498c2ecf20Sopenharmony_ci
3508c2ecf20Sopenharmony_ci	/* Use the socket buffer sizes as hints for the VMBUS ring size. For
3518c2ecf20Sopenharmony_ci	 * server side sockets, 'sk' is the parent socket and thus, this will
3528c2ecf20Sopenharmony_ci	 * allow the child sockets to inherit the size from the parent. Keep
3538c2ecf20Sopenharmony_ci	 * the mins to the default value and align to page size as per VMBUS
3548c2ecf20Sopenharmony_ci	 * requirements.
3558c2ecf20Sopenharmony_ci	 * For the max, the socket core library will limit the socket buffer
3568c2ecf20Sopenharmony_ci	 * size that can be set by the user, but, since currently, the hv_sock
3578c2ecf20Sopenharmony_ci	 * VMBUS ring buffer is physically contiguous allocation, restrict it
3588c2ecf20Sopenharmony_ci	 * further.
3598c2ecf20Sopenharmony_ci	 * Older versions of hv_sock host side code cannot handle bigger VMBUS
3608c2ecf20Sopenharmony_ci	 * ring buffer size. Use the version number to limit the change to newer
3618c2ecf20Sopenharmony_ci	 * versions.
3628c2ecf20Sopenharmony_ci	 */
3638c2ecf20Sopenharmony_ci	if (vmbus_proto_version < VERSION_WIN10_V5) {
3648c2ecf20Sopenharmony_ci		sndbuf = RINGBUFFER_HVS_SND_SIZE;
3658c2ecf20Sopenharmony_ci		rcvbuf = RINGBUFFER_HVS_RCV_SIZE;
3668c2ecf20Sopenharmony_ci	} else {
3678c2ecf20Sopenharmony_ci		sndbuf = max_t(int, sk->sk_sndbuf, RINGBUFFER_HVS_SND_SIZE);
3688c2ecf20Sopenharmony_ci		sndbuf = min_t(int, sndbuf, RINGBUFFER_HVS_MAX_SIZE);
3698c2ecf20Sopenharmony_ci		sndbuf = ALIGN(sndbuf, HV_HYP_PAGE_SIZE);
3708c2ecf20Sopenharmony_ci		rcvbuf = max_t(int, sk->sk_rcvbuf, RINGBUFFER_HVS_RCV_SIZE);
3718c2ecf20Sopenharmony_ci		rcvbuf = min_t(int, rcvbuf, RINGBUFFER_HVS_MAX_SIZE);
3728c2ecf20Sopenharmony_ci		rcvbuf = ALIGN(rcvbuf, HV_HYP_PAGE_SIZE);
3738c2ecf20Sopenharmony_ci	}
3748c2ecf20Sopenharmony_ci
3758c2ecf20Sopenharmony_ci	ret = vmbus_open(chan, sndbuf, rcvbuf, NULL, 0, hvs_channel_cb,
3768c2ecf20Sopenharmony_ci			 conn_from_host ? new : sk);
3778c2ecf20Sopenharmony_ci	if (ret != 0) {
3788c2ecf20Sopenharmony_ci		if (conn_from_host) {
3798c2ecf20Sopenharmony_ci			hvs_new->chan = NULL;
3808c2ecf20Sopenharmony_ci			sock_put(new);
3818c2ecf20Sopenharmony_ci		} else {
3828c2ecf20Sopenharmony_ci			hvs->chan = NULL;
3838c2ecf20Sopenharmony_ci		}
3848c2ecf20Sopenharmony_ci		goto out;
3858c2ecf20Sopenharmony_ci	}
3868c2ecf20Sopenharmony_ci
3878c2ecf20Sopenharmony_ci	set_per_channel_state(chan, conn_from_host ? new : sk);
3888c2ecf20Sopenharmony_ci
3898c2ecf20Sopenharmony_ci	/* This reference will be dropped by hvs_close_connection(). */
3908c2ecf20Sopenharmony_ci	sock_hold(conn_from_host ? new : sk);
3918c2ecf20Sopenharmony_ci	vmbus_set_chn_rescind_callback(chan, hvs_close_connection);
3928c2ecf20Sopenharmony_ci
3938c2ecf20Sopenharmony_ci	/* Set the pending send size to max packet size to always get
3948c2ecf20Sopenharmony_ci	 * notifications from the host when there is enough writable space.
3958c2ecf20Sopenharmony_ci	 * The host is optimized to send notifications only when the pending
3968c2ecf20Sopenharmony_ci	 * size boundary is crossed, and not always.
3978c2ecf20Sopenharmony_ci	 */
3988c2ecf20Sopenharmony_ci	hvs_set_channel_pending_send_size(chan);
3998c2ecf20Sopenharmony_ci
4008c2ecf20Sopenharmony_ci	if (conn_from_host) {
4018c2ecf20Sopenharmony_ci		new->sk_state = TCP_ESTABLISHED;
4028c2ecf20Sopenharmony_ci		sk_acceptq_added(sk);
4038c2ecf20Sopenharmony_ci
4048c2ecf20Sopenharmony_ci		hvs_new->vm_srv_id = *if_type;
4058c2ecf20Sopenharmony_ci		hvs_new->host_srv_id = *if_instance;
4068c2ecf20Sopenharmony_ci
4078c2ecf20Sopenharmony_ci		vsock_insert_connected(vnew);
4088c2ecf20Sopenharmony_ci
4098c2ecf20Sopenharmony_ci		vsock_enqueue_accept(sk, new);
4108c2ecf20Sopenharmony_ci	} else {
4118c2ecf20Sopenharmony_ci		sk->sk_state = TCP_ESTABLISHED;
4128c2ecf20Sopenharmony_ci		sk->sk_socket->state = SS_CONNECTED;
4138c2ecf20Sopenharmony_ci
4148c2ecf20Sopenharmony_ci		vsock_insert_connected(vsock_sk(sk));
4158c2ecf20Sopenharmony_ci	}
4168c2ecf20Sopenharmony_ci
4178c2ecf20Sopenharmony_ci	sk->sk_state_change(sk);
4188c2ecf20Sopenharmony_ci
4198c2ecf20Sopenharmony_ciout:
4208c2ecf20Sopenharmony_ci	/* Release refcnt obtained when we called vsock_find_bound_socket() */
4218c2ecf20Sopenharmony_ci	sock_put(sk);
4228c2ecf20Sopenharmony_ci
4238c2ecf20Sopenharmony_ci	release_sock(sk);
4248c2ecf20Sopenharmony_ci}
4258c2ecf20Sopenharmony_ci
4268c2ecf20Sopenharmony_cistatic u32 hvs_get_local_cid(void)
4278c2ecf20Sopenharmony_ci{
4288c2ecf20Sopenharmony_ci	return VMADDR_CID_ANY;
4298c2ecf20Sopenharmony_ci}
4308c2ecf20Sopenharmony_ci
4318c2ecf20Sopenharmony_cistatic int hvs_sock_init(struct vsock_sock *vsk, struct vsock_sock *psk)
4328c2ecf20Sopenharmony_ci{
4338c2ecf20Sopenharmony_ci	struct hvsock *hvs;
4348c2ecf20Sopenharmony_ci	struct sock *sk = sk_vsock(vsk);
4358c2ecf20Sopenharmony_ci
4368c2ecf20Sopenharmony_ci	hvs = kzalloc(sizeof(*hvs), GFP_KERNEL);
4378c2ecf20Sopenharmony_ci	if (!hvs)
4388c2ecf20Sopenharmony_ci		return -ENOMEM;
4398c2ecf20Sopenharmony_ci
4408c2ecf20Sopenharmony_ci	vsk->trans = hvs;
4418c2ecf20Sopenharmony_ci	hvs->vsk = vsk;
4428c2ecf20Sopenharmony_ci	sk->sk_sndbuf = RINGBUFFER_HVS_SND_SIZE;
4438c2ecf20Sopenharmony_ci	sk->sk_rcvbuf = RINGBUFFER_HVS_RCV_SIZE;
4448c2ecf20Sopenharmony_ci	return 0;
4458c2ecf20Sopenharmony_ci}
4468c2ecf20Sopenharmony_ci
4478c2ecf20Sopenharmony_cistatic int hvs_connect(struct vsock_sock *vsk)
4488c2ecf20Sopenharmony_ci{
4498c2ecf20Sopenharmony_ci	union hvs_service_id vm, host;
4508c2ecf20Sopenharmony_ci	struct hvsock *h = vsk->trans;
4518c2ecf20Sopenharmony_ci
4528c2ecf20Sopenharmony_ci	vm.srv_id = srv_id_template;
4538c2ecf20Sopenharmony_ci	vm.svm_port = vsk->local_addr.svm_port;
4548c2ecf20Sopenharmony_ci	h->vm_srv_id = vm.srv_id;
4558c2ecf20Sopenharmony_ci
4568c2ecf20Sopenharmony_ci	host.srv_id = srv_id_template;
4578c2ecf20Sopenharmony_ci	host.svm_port = vsk->remote_addr.svm_port;
4588c2ecf20Sopenharmony_ci	h->host_srv_id = host.srv_id;
4598c2ecf20Sopenharmony_ci
4608c2ecf20Sopenharmony_ci	return vmbus_send_tl_connect_request(&h->vm_srv_id, &h->host_srv_id);
4618c2ecf20Sopenharmony_ci}
4628c2ecf20Sopenharmony_ci
4638c2ecf20Sopenharmony_cistatic void hvs_shutdown_lock_held(struct hvsock *hvs, int mode)
4648c2ecf20Sopenharmony_ci{
4658c2ecf20Sopenharmony_ci	struct vmpipe_proto_header hdr;
4668c2ecf20Sopenharmony_ci
4678c2ecf20Sopenharmony_ci	if (hvs->fin_sent || !hvs->chan)
4688c2ecf20Sopenharmony_ci		return;
4698c2ecf20Sopenharmony_ci
4708c2ecf20Sopenharmony_ci	/* It can't fail: see hvs_channel_writable_bytes(). */
4718c2ecf20Sopenharmony_ci	(void)hvs_send_data(hvs->chan, (struct hvs_send_buf *)&hdr, 0);
4728c2ecf20Sopenharmony_ci	hvs->fin_sent = true;
4738c2ecf20Sopenharmony_ci}
4748c2ecf20Sopenharmony_ci
4758c2ecf20Sopenharmony_cistatic int hvs_shutdown(struct vsock_sock *vsk, int mode)
4768c2ecf20Sopenharmony_ci{
4778c2ecf20Sopenharmony_ci	if (!(mode & SEND_SHUTDOWN))
4788c2ecf20Sopenharmony_ci		return 0;
4798c2ecf20Sopenharmony_ci
4808c2ecf20Sopenharmony_ci	hvs_shutdown_lock_held(vsk->trans, mode);
4818c2ecf20Sopenharmony_ci	return 0;
4828c2ecf20Sopenharmony_ci}
4838c2ecf20Sopenharmony_ci
4848c2ecf20Sopenharmony_cistatic void hvs_close_timeout(struct work_struct *work)
4858c2ecf20Sopenharmony_ci{
4868c2ecf20Sopenharmony_ci	struct vsock_sock *vsk =
4878c2ecf20Sopenharmony_ci		container_of(work, struct vsock_sock, close_work.work);
4888c2ecf20Sopenharmony_ci	struct sock *sk = sk_vsock(vsk);
4898c2ecf20Sopenharmony_ci
4908c2ecf20Sopenharmony_ci	sock_hold(sk);
4918c2ecf20Sopenharmony_ci	lock_sock(sk);
4928c2ecf20Sopenharmony_ci	if (!sock_flag(sk, SOCK_DONE))
4938c2ecf20Sopenharmony_ci		hvs_do_close_lock_held(vsk, false);
4948c2ecf20Sopenharmony_ci
4958c2ecf20Sopenharmony_ci	vsk->close_work_scheduled = false;
4968c2ecf20Sopenharmony_ci	release_sock(sk);
4978c2ecf20Sopenharmony_ci	sock_put(sk);
4988c2ecf20Sopenharmony_ci}
4998c2ecf20Sopenharmony_ci
5008c2ecf20Sopenharmony_ci/* Returns true, if it is safe to remove socket; false otherwise */
5018c2ecf20Sopenharmony_cistatic bool hvs_close_lock_held(struct vsock_sock *vsk)
5028c2ecf20Sopenharmony_ci{
5038c2ecf20Sopenharmony_ci	struct sock *sk = sk_vsock(vsk);
5048c2ecf20Sopenharmony_ci
5058c2ecf20Sopenharmony_ci	if (!(sk->sk_state == TCP_ESTABLISHED ||
5068c2ecf20Sopenharmony_ci	      sk->sk_state == TCP_CLOSING))
5078c2ecf20Sopenharmony_ci		return true;
5088c2ecf20Sopenharmony_ci
5098c2ecf20Sopenharmony_ci	if ((sk->sk_shutdown & SHUTDOWN_MASK) != SHUTDOWN_MASK)
5108c2ecf20Sopenharmony_ci		hvs_shutdown_lock_held(vsk->trans, SHUTDOWN_MASK);
5118c2ecf20Sopenharmony_ci
5128c2ecf20Sopenharmony_ci	if (sock_flag(sk, SOCK_DONE))
5138c2ecf20Sopenharmony_ci		return true;
5148c2ecf20Sopenharmony_ci
5158c2ecf20Sopenharmony_ci	/* This reference will be dropped by the delayed close routine */
5168c2ecf20Sopenharmony_ci	sock_hold(sk);
5178c2ecf20Sopenharmony_ci	INIT_DELAYED_WORK(&vsk->close_work, hvs_close_timeout);
5188c2ecf20Sopenharmony_ci	vsk->close_work_scheduled = true;
5198c2ecf20Sopenharmony_ci	schedule_delayed_work(&vsk->close_work, HVS_CLOSE_TIMEOUT);
5208c2ecf20Sopenharmony_ci	return false;
5218c2ecf20Sopenharmony_ci}
5228c2ecf20Sopenharmony_ci
5238c2ecf20Sopenharmony_cistatic void hvs_release(struct vsock_sock *vsk)
5248c2ecf20Sopenharmony_ci{
5258c2ecf20Sopenharmony_ci	bool remove_sock;
5268c2ecf20Sopenharmony_ci
5278c2ecf20Sopenharmony_ci	remove_sock = hvs_close_lock_held(vsk);
5288c2ecf20Sopenharmony_ci	if (remove_sock)
5298c2ecf20Sopenharmony_ci		vsock_remove_sock(vsk);
5308c2ecf20Sopenharmony_ci}
5318c2ecf20Sopenharmony_ci
5328c2ecf20Sopenharmony_cistatic void hvs_destruct(struct vsock_sock *vsk)
5338c2ecf20Sopenharmony_ci{
5348c2ecf20Sopenharmony_ci	struct hvsock *hvs = vsk->trans;
5358c2ecf20Sopenharmony_ci	struct vmbus_channel *chan = hvs->chan;
5368c2ecf20Sopenharmony_ci
5378c2ecf20Sopenharmony_ci	if (chan)
5388c2ecf20Sopenharmony_ci		vmbus_hvsock_device_unregister(chan);
5398c2ecf20Sopenharmony_ci
5408c2ecf20Sopenharmony_ci	kfree(hvs);
5418c2ecf20Sopenharmony_ci}
5428c2ecf20Sopenharmony_ci
5438c2ecf20Sopenharmony_cistatic int hvs_dgram_bind(struct vsock_sock *vsk, struct sockaddr_vm *addr)
5448c2ecf20Sopenharmony_ci{
5458c2ecf20Sopenharmony_ci	return -EOPNOTSUPP;
5468c2ecf20Sopenharmony_ci}
5478c2ecf20Sopenharmony_ci
5488c2ecf20Sopenharmony_cistatic int hvs_dgram_dequeue(struct vsock_sock *vsk, struct msghdr *msg,
5498c2ecf20Sopenharmony_ci			     size_t len, int flags)
5508c2ecf20Sopenharmony_ci{
5518c2ecf20Sopenharmony_ci	return -EOPNOTSUPP;
5528c2ecf20Sopenharmony_ci}
5538c2ecf20Sopenharmony_ci
5548c2ecf20Sopenharmony_cistatic int hvs_dgram_enqueue(struct vsock_sock *vsk,
5558c2ecf20Sopenharmony_ci			     struct sockaddr_vm *remote, struct msghdr *msg,
5568c2ecf20Sopenharmony_ci			     size_t dgram_len)
5578c2ecf20Sopenharmony_ci{
5588c2ecf20Sopenharmony_ci	return -EOPNOTSUPP;
5598c2ecf20Sopenharmony_ci}
5608c2ecf20Sopenharmony_ci
5618c2ecf20Sopenharmony_cistatic bool hvs_dgram_allow(u32 cid, u32 port)
5628c2ecf20Sopenharmony_ci{
5638c2ecf20Sopenharmony_ci	return false;
5648c2ecf20Sopenharmony_ci}
5658c2ecf20Sopenharmony_ci
5668c2ecf20Sopenharmony_cistatic int hvs_update_recv_data(struct hvsock *hvs)
5678c2ecf20Sopenharmony_ci{
5688c2ecf20Sopenharmony_ci	struct hvs_recv_buf *recv_buf;
5698c2ecf20Sopenharmony_ci	u32 payload_len;
5708c2ecf20Sopenharmony_ci
5718c2ecf20Sopenharmony_ci	recv_buf = (struct hvs_recv_buf *)(hvs->recv_desc + 1);
5728c2ecf20Sopenharmony_ci	payload_len = recv_buf->hdr.data_size;
5738c2ecf20Sopenharmony_ci
5748c2ecf20Sopenharmony_ci	if (payload_len > HVS_MTU_SIZE)
5758c2ecf20Sopenharmony_ci		return -EIO;
5768c2ecf20Sopenharmony_ci
5778c2ecf20Sopenharmony_ci	if (payload_len == 0)
5788c2ecf20Sopenharmony_ci		hvs->vsk->peer_shutdown |= SEND_SHUTDOWN;
5798c2ecf20Sopenharmony_ci
5808c2ecf20Sopenharmony_ci	hvs->recv_data_len = payload_len;
5818c2ecf20Sopenharmony_ci	hvs->recv_data_off = 0;
5828c2ecf20Sopenharmony_ci
5838c2ecf20Sopenharmony_ci	return 0;
5848c2ecf20Sopenharmony_ci}
5858c2ecf20Sopenharmony_ci
5868c2ecf20Sopenharmony_cistatic ssize_t hvs_stream_dequeue(struct vsock_sock *vsk, struct msghdr *msg,
5878c2ecf20Sopenharmony_ci				  size_t len, int flags)
5888c2ecf20Sopenharmony_ci{
5898c2ecf20Sopenharmony_ci	struct hvsock *hvs = vsk->trans;
5908c2ecf20Sopenharmony_ci	bool need_refill = !hvs->recv_desc;
5918c2ecf20Sopenharmony_ci	struct hvs_recv_buf *recv_buf;
5928c2ecf20Sopenharmony_ci	u32 to_read;
5938c2ecf20Sopenharmony_ci	int ret;
5948c2ecf20Sopenharmony_ci
5958c2ecf20Sopenharmony_ci	if (flags & MSG_PEEK)
5968c2ecf20Sopenharmony_ci		return -EOPNOTSUPP;
5978c2ecf20Sopenharmony_ci
5988c2ecf20Sopenharmony_ci	if (need_refill) {
5998c2ecf20Sopenharmony_ci		hvs->recv_desc = hv_pkt_iter_first(hvs->chan);
6008c2ecf20Sopenharmony_ci		ret = hvs_update_recv_data(hvs);
6018c2ecf20Sopenharmony_ci		if (ret)
6028c2ecf20Sopenharmony_ci			return ret;
6038c2ecf20Sopenharmony_ci	}
6048c2ecf20Sopenharmony_ci
6058c2ecf20Sopenharmony_ci	recv_buf = (struct hvs_recv_buf *)(hvs->recv_desc + 1);
6068c2ecf20Sopenharmony_ci	to_read = min_t(u32, len, hvs->recv_data_len);
6078c2ecf20Sopenharmony_ci	ret = memcpy_to_msg(msg, recv_buf->data + hvs->recv_data_off, to_read);
6088c2ecf20Sopenharmony_ci	if (ret != 0)
6098c2ecf20Sopenharmony_ci		return ret;
6108c2ecf20Sopenharmony_ci
6118c2ecf20Sopenharmony_ci	hvs->recv_data_len -= to_read;
6128c2ecf20Sopenharmony_ci	if (hvs->recv_data_len == 0) {
6138c2ecf20Sopenharmony_ci		hvs->recv_desc = hv_pkt_iter_next(hvs->chan, hvs->recv_desc);
6148c2ecf20Sopenharmony_ci		if (hvs->recv_desc) {
6158c2ecf20Sopenharmony_ci			ret = hvs_update_recv_data(hvs);
6168c2ecf20Sopenharmony_ci			if (ret)
6178c2ecf20Sopenharmony_ci				return ret;
6188c2ecf20Sopenharmony_ci		}
6198c2ecf20Sopenharmony_ci	} else {
6208c2ecf20Sopenharmony_ci		hvs->recv_data_off += to_read;
6218c2ecf20Sopenharmony_ci	}
6228c2ecf20Sopenharmony_ci
6238c2ecf20Sopenharmony_ci	return to_read;
6248c2ecf20Sopenharmony_ci}
6258c2ecf20Sopenharmony_ci
6268c2ecf20Sopenharmony_cistatic ssize_t hvs_stream_enqueue(struct vsock_sock *vsk, struct msghdr *msg,
6278c2ecf20Sopenharmony_ci				  size_t len)
6288c2ecf20Sopenharmony_ci{
6298c2ecf20Sopenharmony_ci	struct hvsock *hvs = vsk->trans;
6308c2ecf20Sopenharmony_ci	struct vmbus_channel *chan = hvs->chan;
6318c2ecf20Sopenharmony_ci	struct hvs_send_buf *send_buf;
6328c2ecf20Sopenharmony_ci	ssize_t to_write, max_writable;
6338c2ecf20Sopenharmony_ci	ssize_t ret = 0;
6348c2ecf20Sopenharmony_ci	ssize_t bytes_written = 0;
6358c2ecf20Sopenharmony_ci
6368c2ecf20Sopenharmony_ci	BUILD_BUG_ON(sizeof(*send_buf) != HV_HYP_PAGE_SIZE);
6378c2ecf20Sopenharmony_ci
6388c2ecf20Sopenharmony_ci	send_buf = kmalloc(sizeof(*send_buf), GFP_KERNEL);
6398c2ecf20Sopenharmony_ci	if (!send_buf)
6408c2ecf20Sopenharmony_ci		return -ENOMEM;
6418c2ecf20Sopenharmony_ci
6428c2ecf20Sopenharmony_ci	/* Reader(s) could be draining data from the channel as we write.
6438c2ecf20Sopenharmony_ci	 * Maximize bandwidth, by iterating until the channel is found to be
6448c2ecf20Sopenharmony_ci	 * full.
6458c2ecf20Sopenharmony_ci	 */
6468c2ecf20Sopenharmony_ci	while (len) {
6478c2ecf20Sopenharmony_ci		max_writable = hvs_channel_writable_bytes(chan);
6488c2ecf20Sopenharmony_ci		if (!max_writable)
6498c2ecf20Sopenharmony_ci			break;
6508c2ecf20Sopenharmony_ci		to_write = min_t(ssize_t, len, max_writable);
6518c2ecf20Sopenharmony_ci		to_write = min_t(ssize_t, to_write, HVS_SEND_BUF_SIZE);
6528c2ecf20Sopenharmony_ci		/* memcpy_from_msg is safe for loop as it advances the offsets
6538c2ecf20Sopenharmony_ci		 * within the message iterator.
6548c2ecf20Sopenharmony_ci		 */
6558c2ecf20Sopenharmony_ci		ret = memcpy_from_msg(send_buf->data, msg, to_write);
6568c2ecf20Sopenharmony_ci		if (ret < 0)
6578c2ecf20Sopenharmony_ci			goto out;
6588c2ecf20Sopenharmony_ci
6598c2ecf20Sopenharmony_ci		ret = hvs_send_data(hvs->chan, send_buf, to_write);
6608c2ecf20Sopenharmony_ci		if (ret < 0)
6618c2ecf20Sopenharmony_ci			goto out;
6628c2ecf20Sopenharmony_ci
6638c2ecf20Sopenharmony_ci		bytes_written += to_write;
6648c2ecf20Sopenharmony_ci		len -= to_write;
6658c2ecf20Sopenharmony_ci	}
6668c2ecf20Sopenharmony_ciout:
6678c2ecf20Sopenharmony_ci	/* If any data has been sent, return that */
6688c2ecf20Sopenharmony_ci	if (bytes_written)
6698c2ecf20Sopenharmony_ci		ret = bytes_written;
6708c2ecf20Sopenharmony_ci	kfree(send_buf);
6718c2ecf20Sopenharmony_ci	return ret;
6728c2ecf20Sopenharmony_ci}
6738c2ecf20Sopenharmony_ci
6748c2ecf20Sopenharmony_cistatic s64 hvs_stream_has_data(struct vsock_sock *vsk)
6758c2ecf20Sopenharmony_ci{
6768c2ecf20Sopenharmony_ci	struct hvsock *hvs = vsk->trans;
6778c2ecf20Sopenharmony_ci	s64 ret;
6788c2ecf20Sopenharmony_ci
6798c2ecf20Sopenharmony_ci	if (hvs->recv_data_len > 0)
6808c2ecf20Sopenharmony_ci		return 1;
6818c2ecf20Sopenharmony_ci
6828c2ecf20Sopenharmony_ci	switch (hvs_channel_readable_payload(hvs->chan)) {
6838c2ecf20Sopenharmony_ci	case 1:
6848c2ecf20Sopenharmony_ci		ret = 1;
6858c2ecf20Sopenharmony_ci		break;
6868c2ecf20Sopenharmony_ci	case 0:
6878c2ecf20Sopenharmony_ci		vsk->peer_shutdown |= SEND_SHUTDOWN;
6888c2ecf20Sopenharmony_ci		ret = 0;
6898c2ecf20Sopenharmony_ci		break;
6908c2ecf20Sopenharmony_ci	default: /* -1 */
6918c2ecf20Sopenharmony_ci		ret = 0;
6928c2ecf20Sopenharmony_ci		break;
6938c2ecf20Sopenharmony_ci	}
6948c2ecf20Sopenharmony_ci
6958c2ecf20Sopenharmony_ci	return ret;
6968c2ecf20Sopenharmony_ci}
6978c2ecf20Sopenharmony_ci
6988c2ecf20Sopenharmony_cistatic s64 hvs_stream_has_space(struct vsock_sock *vsk)
6998c2ecf20Sopenharmony_ci{
7008c2ecf20Sopenharmony_ci	struct hvsock *hvs = vsk->trans;
7018c2ecf20Sopenharmony_ci
7028c2ecf20Sopenharmony_ci	return hvs_channel_writable_bytes(hvs->chan);
7038c2ecf20Sopenharmony_ci}
7048c2ecf20Sopenharmony_ci
7058c2ecf20Sopenharmony_cistatic u64 hvs_stream_rcvhiwat(struct vsock_sock *vsk)
7068c2ecf20Sopenharmony_ci{
7078c2ecf20Sopenharmony_ci	return HVS_MTU_SIZE + 1;
7088c2ecf20Sopenharmony_ci}
7098c2ecf20Sopenharmony_ci
7108c2ecf20Sopenharmony_cistatic bool hvs_stream_is_active(struct vsock_sock *vsk)
7118c2ecf20Sopenharmony_ci{
7128c2ecf20Sopenharmony_ci	struct hvsock *hvs = vsk->trans;
7138c2ecf20Sopenharmony_ci
7148c2ecf20Sopenharmony_ci	return hvs->chan != NULL;
7158c2ecf20Sopenharmony_ci}
7168c2ecf20Sopenharmony_ci
7178c2ecf20Sopenharmony_cistatic bool hvs_stream_allow(u32 cid, u32 port)
7188c2ecf20Sopenharmony_ci{
7198c2ecf20Sopenharmony_ci	if (cid == VMADDR_CID_HOST)
7208c2ecf20Sopenharmony_ci		return true;
7218c2ecf20Sopenharmony_ci
7228c2ecf20Sopenharmony_ci	return false;
7238c2ecf20Sopenharmony_ci}
7248c2ecf20Sopenharmony_ci
7258c2ecf20Sopenharmony_cistatic
7268c2ecf20Sopenharmony_ciint hvs_notify_poll_in(struct vsock_sock *vsk, size_t target, bool *readable)
7278c2ecf20Sopenharmony_ci{
7288c2ecf20Sopenharmony_ci	struct hvsock *hvs = vsk->trans;
7298c2ecf20Sopenharmony_ci
7308c2ecf20Sopenharmony_ci	*readable = hvs_channel_readable(hvs->chan);
7318c2ecf20Sopenharmony_ci	return 0;
7328c2ecf20Sopenharmony_ci}
7338c2ecf20Sopenharmony_ci
7348c2ecf20Sopenharmony_cistatic
7358c2ecf20Sopenharmony_ciint hvs_notify_poll_out(struct vsock_sock *vsk, size_t target, bool *writable)
7368c2ecf20Sopenharmony_ci{
7378c2ecf20Sopenharmony_ci	*writable = hvs_stream_has_space(vsk) > 0;
7388c2ecf20Sopenharmony_ci
7398c2ecf20Sopenharmony_ci	return 0;
7408c2ecf20Sopenharmony_ci}
7418c2ecf20Sopenharmony_ci
7428c2ecf20Sopenharmony_cistatic
7438c2ecf20Sopenharmony_ciint hvs_notify_recv_init(struct vsock_sock *vsk, size_t target,
7448c2ecf20Sopenharmony_ci			 struct vsock_transport_recv_notify_data *d)
7458c2ecf20Sopenharmony_ci{
7468c2ecf20Sopenharmony_ci	return 0;
7478c2ecf20Sopenharmony_ci}
7488c2ecf20Sopenharmony_ci
7498c2ecf20Sopenharmony_cistatic
7508c2ecf20Sopenharmony_ciint hvs_notify_recv_pre_block(struct vsock_sock *vsk, size_t target,
7518c2ecf20Sopenharmony_ci			      struct vsock_transport_recv_notify_data *d)
7528c2ecf20Sopenharmony_ci{
7538c2ecf20Sopenharmony_ci	return 0;
7548c2ecf20Sopenharmony_ci}
7558c2ecf20Sopenharmony_ci
7568c2ecf20Sopenharmony_cistatic
7578c2ecf20Sopenharmony_ciint hvs_notify_recv_pre_dequeue(struct vsock_sock *vsk, size_t target,
7588c2ecf20Sopenharmony_ci				struct vsock_transport_recv_notify_data *d)
7598c2ecf20Sopenharmony_ci{
7608c2ecf20Sopenharmony_ci	return 0;
7618c2ecf20Sopenharmony_ci}
7628c2ecf20Sopenharmony_ci
7638c2ecf20Sopenharmony_cistatic
7648c2ecf20Sopenharmony_ciint hvs_notify_recv_post_dequeue(struct vsock_sock *vsk, size_t target,
7658c2ecf20Sopenharmony_ci				 ssize_t copied, bool data_read,
7668c2ecf20Sopenharmony_ci				 struct vsock_transport_recv_notify_data *d)
7678c2ecf20Sopenharmony_ci{
7688c2ecf20Sopenharmony_ci	return 0;
7698c2ecf20Sopenharmony_ci}
7708c2ecf20Sopenharmony_ci
7718c2ecf20Sopenharmony_cistatic
7728c2ecf20Sopenharmony_ciint hvs_notify_send_init(struct vsock_sock *vsk,
7738c2ecf20Sopenharmony_ci			 struct vsock_transport_send_notify_data *d)
7748c2ecf20Sopenharmony_ci{
7758c2ecf20Sopenharmony_ci	return 0;
7768c2ecf20Sopenharmony_ci}
7778c2ecf20Sopenharmony_ci
7788c2ecf20Sopenharmony_cistatic
7798c2ecf20Sopenharmony_ciint hvs_notify_send_pre_block(struct vsock_sock *vsk,
7808c2ecf20Sopenharmony_ci			      struct vsock_transport_send_notify_data *d)
7818c2ecf20Sopenharmony_ci{
7828c2ecf20Sopenharmony_ci	return 0;
7838c2ecf20Sopenharmony_ci}
7848c2ecf20Sopenharmony_ci
7858c2ecf20Sopenharmony_cistatic
7868c2ecf20Sopenharmony_ciint hvs_notify_send_pre_enqueue(struct vsock_sock *vsk,
7878c2ecf20Sopenharmony_ci				struct vsock_transport_send_notify_data *d)
7888c2ecf20Sopenharmony_ci{
7898c2ecf20Sopenharmony_ci	return 0;
7908c2ecf20Sopenharmony_ci}
7918c2ecf20Sopenharmony_ci
7928c2ecf20Sopenharmony_cistatic
7938c2ecf20Sopenharmony_ciint hvs_notify_send_post_enqueue(struct vsock_sock *vsk, ssize_t written,
7948c2ecf20Sopenharmony_ci				 struct vsock_transport_send_notify_data *d)
7958c2ecf20Sopenharmony_ci{
7968c2ecf20Sopenharmony_ci	return 0;
7978c2ecf20Sopenharmony_ci}
7988c2ecf20Sopenharmony_ci
7998c2ecf20Sopenharmony_cistatic struct vsock_transport hvs_transport = {
8008c2ecf20Sopenharmony_ci	.module                   = THIS_MODULE,
8018c2ecf20Sopenharmony_ci
8028c2ecf20Sopenharmony_ci	.get_local_cid            = hvs_get_local_cid,
8038c2ecf20Sopenharmony_ci
8048c2ecf20Sopenharmony_ci	.init                     = hvs_sock_init,
8058c2ecf20Sopenharmony_ci	.destruct                 = hvs_destruct,
8068c2ecf20Sopenharmony_ci	.release                  = hvs_release,
8078c2ecf20Sopenharmony_ci	.connect                  = hvs_connect,
8088c2ecf20Sopenharmony_ci	.shutdown                 = hvs_shutdown,
8098c2ecf20Sopenharmony_ci
8108c2ecf20Sopenharmony_ci	.dgram_bind               = hvs_dgram_bind,
8118c2ecf20Sopenharmony_ci	.dgram_dequeue            = hvs_dgram_dequeue,
8128c2ecf20Sopenharmony_ci	.dgram_enqueue            = hvs_dgram_enqueue,
8138c2ecf20Sopenharmony_ci	.dgram_allow              = hvs_dgram_allow,
8148c2ecf20Sopenharmony_ci
8158c2ecf20Sopenharmony_ci	.stream_dequeue           = hvs_stream_dequeue,
8168c2ecf20Sopenharmony_ci	.stream_enqueue           = hvs_stream_enqueue,
8178c2ecf20Sopenharmony_ci	.stream_has_data          = hvs_stream_has_data,
8188c2ecf20Sopenharmony_ci	.stream_has_space         = hvs_stream_has_space,
8198c2ecf20Sopenharmony_ci	.stream_rcvhiwat          = hvs_stream_rcvhiwat,
8208c2ecf20Sopenharmony_ci	.stream_is_active         = hvs_stream_is_active,
8218c2ecf20Sopenharmony_ci	.stream_allow             = hvs_stream_allow,
8228c2ecf20Sopenharmony_ci
8238c2ecf20Sopenharmony_ci	.notify_poll_in           = hvs_notify_poll_in,
8248c2ecf20Sopenharmony_ci	.notify_poll_out          = hvs_notify_poll_out,
8258c2ecf20Sopenharmony_ci	.notify_recv_init         = hvs_notify_recv_init,
8268c2ecf20Sopenharmony_ci	.notify_recv_pre_block    = hvs_notify_recv_pre_block,
8278c2ecf20Sopenharmony_ci	.notify_recv_pre_dequeue  = hvs_notify_recv_pre_dequeue,
8288c2ecf20Sopenharmony_ci	.notify_recv_post_dequeue = hvs_notify_recv_post_dequeue,
8298c2ecf20Sopenharmony_ci	.notify_send_init         = hvs_notify_send_init,
8308c2ecf20Sopenharmony_ci	.notify_send_pre_block    = hvs_notify_send_pre_block,
8318c2ecf20Sopenharmony_ci	.notify_send_pre_enqueue  = hvs_notify_send_pre_enqueue,
8328c2ecf20Sopenharmony_ci	.notify_send_post_enqueue = hvs_notify_send_post_enqueue,
8338c2ecf20Sopenharmony_ci
8348c2ecf20Sopenharmony_ci};
8358c2ecf20Sopenharmony_ci
8368c2ecf20Sopenharmony_cistatic bool hvs_check_transport(struct vsock_sock *vsk)
8378c2ecf20Sopenharmony_ci{
8388c2ecf20Sopenharmony_ci	return vsk->transport == &hvs_transport;
8398c2ecf20Sopenharmony_ci}
8408c2ecf20Sopenharmony_ci
8418c2ecf20Sopenharmony_cistatic int hvs_probe(struct hv_device *hdev,
8428c2ecf20Sopenharmony_ci		     const struct hv_vmbus_device_id *dev_id)
8438c2ecf20Sopenharmony_ci{
8448c2ecf20Sopenharmony_ci	struct vmbus_channel *chan = hdev->channel;
8458c2ecf20Sopenharmony_ci
8468c2ecf20Sopenharmony_ci	hvs_open_connection(chan);
8478c2ecf20Sopenharmony_ci
8488c2ecf20Sopenharmony_ci	/* Always return success to suppress the unnecessary error message
8498c2ecf20Sopenharmony_ci	 * in vmbus_probe(): on error the host will rescind the device in
8508c2ecf20Sopenharmony_ci	 * 30 seconds and we can do cleanup at that time in
8518c2ecf20Sopenharmony_ci	 * vmbus_onoffer_rescind().
8528c2ecf20Sopenharmony_ci	 */
8538c2ecf20Sopenharmony_ci	return 0;
8548c2ecf20Sopenharmony_ci}
8558c2ecf20Sopenharmony_ci
8568c2ecf20Sopenharmony_cistatic int hvs_remove(struct hv_device *hdev)
8578c2ecf20Sopenharmony_ci{
8588c2ecf20Sopenharmony_ci	struct vmbus_channel *chan = hdev->channel;
8598c2ecf20Sopenharmony_ci
8608c2ecf20Sopenharmony_ci	vmbus_close(chan);
8618c2ecf20Sopenharmony_ci
8628c2ecf20Sopenharmony_ci	return 0;
8638c2ecf20Sopenharmony_ci}
8648c2ecf20Sopenharmony_ci
8658c2ecf20Sopenharmony_ci/* hv_sock connections can not persist across hibernation, and all the hv_sock
8668c2ecf20Sopenharmony_ci * channels are forced to be rescinded before hibernation: see
8678c2ecf20Sopenharmony_ci * vmbus_bus_suspend(). Here the dummy hvs_suspend() and hvs_resume()
8688c2ecf20Sopenharmony_ci * are only needed because hibernation requires that every vmbus device's
8698c2ecf20Sopenharmony_ci * driver should have a .suspend and .resume callback: see vmbus_suspend().
8708c2ecf20Sopenharmony_ci */
8718c2ecf20Sopenharmony_cistatic int hvs_suspend(struct hv_device *hv_dev)
8728c2ecf20Sopenharmony_ci{
8738c2ecf20Sopenharmony_ci	/* Dummy */
8748c2ecf20Sopenharmony_ci	return 0;
8758c2ecf20Sopenharmony_ci}
8768c2ecf20Sopenharmony_ci
8778c2ecf20Sopenharmony_cistatic int hvs_resume(struct hv_device *dev)
8788c2ecf20Sopenharmony_ci{
8798c2ecf20Sopenharmony_ci	/* Dummy */
8808c2ecf20Sopenharmony_ci	return 0;
8818c2ecf20Sopenharmony_ci}
8828c2ecf20Sopenharmony_ci
8838c2ecf20Sopenharmony_ci/* This isn't really used. See vmbus_match() and vmbus_probe() */
8848c2ecf20Sopenharmony_cistatic const struct hv_vmbus_device_id id_table[] = {
8858c2ecf20Sopenharmony_ci	{},
8868c2ecf20Sopenharmony_ci};
8878c2ecf20Sopenharmony_ci
8888c2ecf20Sopenharmony_cistatic struct hv_driver hvs_drv = {
8898c2ecf20Sopenharmony_ci	.name		= "hv_sock",
8908c2ecf20Sopenharmony_ci	.hvsock		= true,
8918c2ecf20Sopenharmony_ci	.id_table	= id_table,
8928c2ecf20Sopenharmony_ci	.probe		= hvs_probe,
8938c2ecf20Sopenharmony_ci	.remove		= hvs_remove,
8948c2ecf20Sopenharmony_ci	.suspend	= hvs_suspend,
8958c2ecf20Sopenharmony_ci	.resume		= hvs_resume,
8968c2ecf20Sopenharmony_ci};
8978c2ecf20Sopenharmony_ci
8988c2ecf20Sopenharmony_cistatic int __init hvs_init(void)
8998c2ecf20Sopenharmony_ci{
9008c2ecf20Sopenharmony_ci	int ret;
9018c2ecf20Sopenharmony_ci
9028c2ecf20Sopenharmony_ci	if (vmbus_proto_version < VERSION_WIN10)
9038c2ecf20Sopenharmony_ci		return -ENODEV;
9048c2ecf20Sopenharmony_ci
9058c2ecf20Sopenharmony_ci	ret = vmbus_driver_register(&hvs_drv);
9068c2ecf20Sopenharmony_ci	if (ret != 0)
9078c2ecf20Sopenharmony_ci		return ret;
9088c2ecf20Sopenharmony_ci
9098c2ecf20Sopenharmony_ci	ret = vsock_core_register(&hvs_transport, VSOCK_TRANSPORT_F_G2H);
9108c2ecf20Sopenharmony_ci	if (ret) {
9118c2ecf20Sopenharmony_ci		vmbus_driver_unregister(&hvs_drv);
9128c2ecf20Sopenharmony_ci		return ret;
9138c2ecf20Sopenharmony_ci	}
9148c2ecf20Sopenharmony_ci
9158c2ecf20Sopenharmony_ci	return 0;
9168c2ecf20Sopenharmony_ci}
9178c2ecf20Sopenharmony_ci
9188c2ecf20Sopenharmony_cistatic void __exit hvs_exit(void)
9198c2ecf20Sopenharmony_ci{
9208c2ecf20Sopenharmony_ci	vsock_core_unregister(&hvs_transport);
9218c2ecf20Sopenharmony_ci	vmbus_driver_unregister(&hvs_drv);
9228c2ecf20Sopenharmony_ci}
9238c2ecf20Sopenharmony_ci
9248c2ecf20Sopenharmony_cimodule_init(hvs_init);
9258c2ecf20Sopenharmony_cimodule_exit(hvs_exit);
9268c2ecf20Sopenharmony_ci
9278c2ecf20Sopenharmony_ciMODULE_DESCRIPTION("Hyper-V Sockets");
9288c2ecf20Sopenharmony_ciMODULE_VERSION("1.0.0");
9298c2ecf20Sopenharmony_ciMODULE_LICENSE("GPL");
9308c2ecf20Sopenharmony_ciMODULE_ALIAS_NETPROTO(PF_VSOCK);
931