1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * common code for virtio vsock
4 *
5 * Copyright (C) 2013-2015 Red Hat, Inc.
6 * Author: Asias He <asias@redhat.com>
7 *         Stefan Hajnoczi <stefanha@redhat.com>
8 */
9#include <linux/spinlock.h>
10#include <linux/module.h>
11#include <linux/sched/signal.h>
12#include <linux/ctype.h>
13#include <linux/list.h>
14#include <linux/virtio_vsock.h>
15#include <uapi/linux/vsockmon.h>
16
17#include <net/sock.h>
18#include <net/af_vsock.h>
19
20#define CREATE_TRACE_POINTS
21#include <trace/events/vsock_virtio_transport_common.h>
22
23/* How long to wait for graceful shutdown of a connection */
24#define VSOCK_CLOSE_TIMEOUT (8 * HZ)
25
26/* Threshold for detecting small packets to copy */
27#define GOOD_COPY_LEN  128
28
29static const struct virtio_transport *
30virtio_transport_get_ops(struct vsock_sock *vsk)
31{
32	const struct vsock_transport *t = vsock_core_get_transport(vsk);
33
34	if (WARN_ON(!t))
35		return NULL;
36
37	return container_of(t, struct virtio_transport, transport);
38}
39
40static struct virtio_vsock_pkt *
41virtio_transport_alloc_pkt(struct virtio_vsock_pkt_info *info,
42			   size_t len,
43			   u32 src_cid,
44			   u32 src_port,
45			   u32 dst_cid,
46			   u32 dst_port)
47{
48	struct virtio_vsock_pkt *pkt;
49	int err;
50
51	pkt = kzalloc(sizeof(*pkt), GFP_KERNEL);
52	if (!pkt)
53		return NULL;
54
55	pkt->hdr.type		= cpu_to_le16(info->type);
56	pkt->hdr.op		= cpu_to_le16(info->op);
57	pkt->hdr.src_cid	= cpu_to_le64(src_cid);
58	pkt->hdr.dst_cid	= cpu_to_le64(dst_cid);
59	pkt->hdr.src_port	= cpu_to_le32(src_port);
60	pkt->hdr.dst_port	= cpu_to_le32(dst_port);
61	pkt->hdr.flags		= cpu_to_le32(info->flags);
62	pkt->len		= len;
63	pkt->hdr.len		= cpu_to_le32(len);
64	pkt->reply		= info->reply;
65	pkt->vsk		= info->vsk;
66
67	if (info->msg && len > 0) {
68		pkt->buf = kmalloc(len, GFP_KERNEL);
69		if (!pkt->buf)
70			goto out_pkt;
71
72		pkt->buf_len = len;
73
74		err = memcpy_from_msg(pkt->buf, info->msg, len);
75		if (err)
76			goto out;
77	}
78
79	trace_virtio_transport_alloc_pkt(src_cid, src_port,
80					 dst_cid, dst_port,
81					 len,
82					 info->type,
83					 info->op,
84					 info->flags);
85
86	return pkt;
87
88out:
89	kfree(pkt->buf);
90out_pkt:
91	kfree(pkt);
92	return NULL;
93}
94
95/* Packet capture */
96static struct sk_buff *virtio_transport_build_skb(void *opaque)
97{
98	struct virtio_vsock_pkt *pkt = opaque;
99	struct af_vsockmon_hdr *hdr;
100	struct sk_buff *skb;
101	size_t payload_len;
102	void *payload_buf;
103
104	/* A packet could be split to fit the RX buffer, so we can retrieve
105	 * the payload length from the header and the buffer pointer taking
106	 * care of the offset in the original packet.
107	 */
108	payload_len = le32_to_cpu(pkt->hdr.len);
109	payload_buf = pkt->buf + pkt->off;
110
111	skb = alloc_skb(sizeof(*hdr) + sizeof(pkt->hdr) + payload_len,
112			GFP_ATOMIC);
113	if (!skb)
114		return NULL;
115
116	hdr = skb_put(skb, sizeof(*hdr));
117
118	/* pkt->hdr is little-endian so no need to byteswap here */
119	hdr->src_cid = pkt->hdr.src_cid;
120	hdr->src_port = pkt->hdr.src_port;
121	hdr->dst_cid = pkt->hdr.dst_cid;
122	hdr->dst_port = pkt->hdr.dst_port;
123
124	hdr->transport = cpu_to_le16(AF_VSOCK_TRANSPORT_VIRTIO);
125	hdr->len = cpu_to_le16(sizeof(pkt->hdr));
126	memset(hdr->reserved, 0, sizeof(hdr->reserved));
127
128	switch (le16_to_cpu(pkt->hdr.op)) {
129	case VIRTIO_VSOCK_OP_REQUEST:
130	case VIRTIO_VSOCK_OP_RESPONSE:
131		hdr->op = cpu_to_le16(AF_VSOCK_OP_CONNECT);
132		break;
133	case VIRTIO_VSOCK_OP_RST:
134	case VIRTIO_VSOCK_OP_SHUTDOWN:
135		hdr->op = cpu_to_le16(AF_VSOCK_OP_DISCONNECT);
136		break;
137	case VIRTIO_VSOCK_OP_RW:
138		hdr->op = cpu_to_le16(AF_VSOCK_OP_PAYLOAD);
139		break;
140	case VIRTIO_VSOCK_OP_CREDIT_UPDATE:
141	case VIRTIO_VSOCK_OP_CREDIT_REQUEST:
142		hdr->op = cpu_to_le16(AF_VSOCK_OP_CONTROL);
143		break;
144	default:
145		hdr->op = cpu_to_le16(AF_VSOCK_OP_UNKNOWN);
146		break;
147	}
148
149	skb_put_data(skb, &pkt->hdr, sizeof(pkt->hdr));
150
151	if (payload_len) {
152		skb_put_data(skb, payload_buf, payload_len);
153	}
154
155	return skb;
156}
157
158void virtio_transport_deliver_tap_pkt(struct virtio_vsock_pkt *pkt)
159{
160	if (pkt->tap_delivered)
161		return;
162
163	vsock_deliver_tap(virtio_transport_build_skb, pkt);
164	pkt->tap_delivered = true;
165}
166EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt);
167
168/* This function can only be used on connecting/connected sockets,
169 * since a socket assigned to a transport is required.
170 *
171 * Do not use on listener sockets!
172 */
173static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
174					  struct virtio_vsock_pkt_info *info)
175{
176	u32 src_cid, src_port, dst_cid, dst_port;
177	const struct virtio_transport *t_ops;
178	struct virtio_vsock_sock *vvs;
179	struct virtio_vsock_pkt *pkt;
180	u32 pkt_len = info->pkt_len;
181
182	t_ops = virtio_transport_get_ops(vsk);
183	if (unlikely(!t_ops))
184		return -EFAULT;
185
186	src_cid = t_ops->transport.get_local_cid();
187	src_port = vsk->local_addr.svm_port;
188	if (!info->remote_cid) {
189		dst_cid	= vsk->remote_addr.svm_cid;
190		dst_port = vsk->remote_addr.svm_port;
191	} else {
192		dst_cid = info->remote_cid;
193		dst_port = info->remote_port;
194	}
195
196	vvs = vsk->trans;
197
198	/* we can send less than pkt_len bytes */
199	if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE)
200		pkt_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE;
201
202	/* virtio_transport_get_credit might return less than pkt_len credit */
203	pkt_len = virtio_transport_get_credit(vvs, pkt_len);
204
205	/* Do not send zero length OP_RW pkt */
206	if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW)
207		return pkt_len;
208
209	pkt = virtio_transport_alloc_pkt(info, pkt_len,
210					 src_cid, src_port,
211					 dst_cid, dst_port);
212	if (!pkt) {
213		virtio_transport_put_credit(vvs, pkt_len);
214		return -ENOMEM;
215	}
216
217	virtio_transport_inc_tx_pkt(vvs, pkt);
218
219	return t_ops->send_pkt(pkt);
220}
221
222static bool virtio_transport_inc_rx_pkt(struct virtio_vsock_sock *vvs,
223					struct virtio_vsock_pkt *pkt)
224{
225	if (vvs->rx_bytes + pkt->len > vvs->buf_alloc)
226		return false;
227
228	vvs->rx_bytes += pkt->len;
229	return true;
230}
231
232static void virtio_transport_dec_rx_pkt(struct virtio_vsock_sock *vvs,
233					struct virtio_vsock_pkt *pkt)
234{
235	vvs->rx_bytes -= pkt->len;
236	vvs->fwd_cnt += pkt->len;
237}
238
239void virtio_transport_inc_tx_pkt(struct virtio_vsock_sock *vvs, struct virtio_vsock_pkt *pkt)
240{
241	spin_lock_bh(&vvs->rx_lock);
242	vvs->last_fwd_cnt = vvs->fwd_cnt;
243	pkt->hdr.fwd_cnt = cpu_to_le32(vvs->fwd_cnt);
244	pkt->hdr.buf_alloc = cpu_to_le32(vvs->buf_alloc);
245	spin_unlock_bh(&vvs->rx_lock);
246}
247EXPORT_SYMBOL_GPL(virtio_transport_inc_tx_pkt);
248
249u32 virtio_transport_get_credit(struct virtio_vsock_sock *vvs, u32 credit)
250{
251	u32 ret;
252
253	spin_lock_bh(&vvs->tx_lock);
254	ret = vvs->peer_buf_alloc - (vvs->tx_cnt - vvs->peer_fwd_cnt);
255	if (ret > credit)
256		ret = credit;
257	vvs->tx_cnt += ret;
258	spin_unlock_bh(&vvs->tx_lock);
259
260	return ret;
261}
262EXPORT_SYMBOL_GPL(virtio_transport_get_credit);
263
264void virtio_transport_put_credit(struct virtio_vsock_sock *vvs, u32 credit)
265{
266	spin_lock_bh(&vvs->tx_lock);
267	vvs->tx_cnt -= credit;
268	spin_unlock_bh(&vvs->tx_lock);
269}
270EXPORT_SYMBOL_GPL(virtio_transport_put_credit);
271
272static int virtio_transport_send_credit_update(struct vsock_sock *vsk,
273					       int type,
274					       struct virtio_vsock_hdr *hdr)
275{
276	struct virtio_vsock_pkt_info info = {
277		.op = VIRTIO_VSOCK_OP_CREDIT_UPDATE,
278		.type = type,
279		.vsk = vsk,
280	};
281
282	return virtio_transport_send_pkt_info(vsk, &info);
283}
284
285static ssize_t
286virtio_transport_stream_do_peek(struct vsock_sock *vsk,
287				struct msghdr *msg,
288				size_t len)
289{
290	struct virtio_vsock_sock *vvs = vsk->trans;
291	struct virtio_vsock_pkt *pkt;
292	size_t bytes, total = 0, off;
293	int err = -EFAULT;
294
295	spin_lock_bh(&vvs->rx_lock);
296
297	list_for_each_entry(pkt, &vvs->rx_queue, list) {
298		off = pkt->off;
299
300		if (total == len)
301			break;
302
303		while (total < len && off < pkt->len) {
304			bytes = len - total;
305			if (bytes > pkt->len - off)
306				bytes = pkt->len - off;
307
308			/* sk_lock is held by caller so no one else can dequeue.
309			 * Unlock rx_lock since memcpy_to_msg() may sleep.
310			 */
311			spin_unlock_bh(&vvs->rx_lock);
312
313			err = memcpy_to_msg(msg, pkt->buf + off, bytes);
314			if (err)
315				goto out;
316
317			spin_lock_bh(&vvs->rx_lock);
318
319			total += bytes;
320			off += bytes;
321		}
322	}
323
324	spin_unlock_bh(&vvs->rx_lock);
325
326	return total;
327
328out:
329	if (total)
330		err = total;
331	return err;
332}
333
334static ssize_t
335virtio_transport_stream_do_dequeue(struct vsock_sock *vsk,
336				   struct msghdr *msg,
337				   size_t len)
338{
339	struct virtio_vsock_sock *vvs = vsk->trans;
340	struct virtio_vsock_pkt *pkt;
341	size_t bytes, total = 0;
342	u32 free_space;
343	u32 fwd_cnt_delta;
344	bool low_rx_bytes;
345	int err = -EFAULT;
346
347	spin_lock_bh(&vvs->rx_lock);
348	while (total < len && !list_empty(&vvs->rx_queue)) {
349		pkt = list_first_entry(&vvs->rx_queue,
350				       struct virtio_vsock_pkt, list);
351
352		bytes = len - total;
353		if (bytes > pkt->len - pkt->off)
354			bytes = pkt->len - pkt->off;
355
356		/* sk_lock is held by caller so no one else can dequeue.
357		 * Unlock rx_lock since memcpy_to_msg() may sleep.
358		 */
359		spin_unlock_bh(&vvs->rx_lock);
360
361		err = memcpy_to_msg(msg, pkt->buf + pkt->off, bytes);
362		if (err)
363			goto out;
364
365		spin_lock_bh(&vvs->rx_lock);
366
367		total += bytes;
368		pkt->off += bytes;
369		if (pkt->off == pkt->len) {
370			virtio_transport_dec_rx_pkt(vvs, pkt);
371			list_del(&pkt->list);
372			virtio_transport_free_pkt(pkt);
373		}
374	}
375
376	fwd_cnt_delta = vvs->fwd_cnt - vvs->last_fwd_cnt;
377	free_space = vvs->buf_alloc - fwd_cnt_delta;
378	low_rx_bytes = (vvs->rx_bytes <
379			sock_rcvlowat(sk_vsock(vsk), 0, INT_MAX));
380
381	spin_unlock_bh(&vvs->rx_lock);
382
383	/* To reduce the number of credit update messages,
384	 * don't update credits as long as lots of space is available.
385	 * Note: the limit chosen here is arbitrary. Setting the limit
386	 * too high causes extra messages. Too low causes transmitter
387	 * stalls. As stalls are in theory more expensive than extra
388	 * messages, we set the limit to a high value. TODO: experiment
389	 * with different values. Also send credit update message when
390	 * number of bytes in rx queue is not enough to wake up reader.
391	 */
392	if (fwd_cnt_delta &&
393	    (free_space < VIRTIO_VSOCK_MAX_PKT_BUF_SIZE || low_rx_bytes)) {
394		virtio_transport_send_credit_update(vsk,
395						    VIRTIO_VSOCK_TYPE_STREAM,
396						    NULL);
397	}
398
399	return total;
400
401out:
402	if (total)
403		err = total;
404	return err;
405}
406
407ssize_t
408virtio_transport_stream_dequeue(struct vsock_sock *vsk,
409				struct msghdr *msg,
410				size_t len, int flags)
411{
412	if (flags & MSG_PEEK)
413		return virtio_transport_stream_do_peek(vsk, msg, len);
414	else
415		return virtio_transport_stream_do_dequeue(vsk, msg, len);
416}
417EXPORT_SYMBOL_GPL(virtio_transport_stream_dequeue);
418
419int
420virtio_transport_dgram_dequeue(struct vsock_sock *vsk,
421			       struct msghdr *msg,
422			       size_t len, int flags)
423{
424	return -EOPNOTSUPP;
425}
426EXPORT_SYMBOL_GPL(virtio_transport_dgram_dequeue);
427
428s64 virtio_transport_stream_has_data(struct vsock_sock *vsk)
429{
430	struct virtio_vsock_sock *vvs = vsk->trans;
431	s64 bytes;
432
433	spin_lock_bh(&vvs->rx_lock);
434	bytes = vvs->rx_bytes;
435	spin_unlock_bh(&vvs->rx_lock);
436
437	return bytes;
438}
439EXPORT_SYMBOL_GPL(virtio_transport_stream_has_data);
440
441static s64 virtio_transport_has_space(struct vsock_sock *vsk)
442{
443	struct virtio_vsock_sock *vvs = vsk->trans;
444	s64 bytes;
445
446	bytes = (s64)vvs->peer_buf_alloc - (vvs->tx_cnt - vvs->peer_fwd_cnt);
447	if (bytes < 0)
448		bytes = 0;
449
450	return bytes;
451}
452
453s64 virtio_transport_stream_has_space(struct vsock_sock *vsk)
454{
455	struct virtio_vsock_sock *vvs = vsk->trans;
456	s64 bytes;
457
458	spin_lock_bh(&vvs->tx_lock);
459	bytes = virtio_transport_has_space(vsk);
460	spin_unlock_bh(&vvs->tx_lock);
461
462	return bytes;
463}
464EXPORT_SYMBOL_GPL(virtio_transport_stream_has_space);
465
466int virtio_transport_do_socket_init(struct vsock_sock *vsk,
467				    struct vsock_sock *psk)
468{
469	struct virtio_vsock_sock *vvs;
470
471	vvs = kzalloc(sizeof(*vvs), GFP_KERNEL);
472	if (!vvs)
473		return -ENOMEM;
474
475	vsk->trans = vvs;
476	vvs->vsk = vsk;
477	if (psk && psk->trans) {
478		struct virtio_vsock_sock *ptrans = psk->trans;
479
480		vvs->peer_buf_alloc = ptrans->peer_buf_alloc;
481	}
482
483	if (vsk->buffer_size > VIRTIO_VSOCK_MAX_BUF_SIZE)
484		vsk->buffer_size = VIRTIO_VSOCK_MAX_BUF_SIZE;
485
486	vvs->buf_alloc = vsk->buffer_size;
487
488	spin_lock_init(&vvs->rx_lock);
489	spin_lock_init(&vvs->tx_lock);
490	INIT_LIST_HEAD(&vvs->rx_queue);
491
492	return 0;
493}
494EXPORT_SYMBOL_GPL(virtio_transport_do_socket_init);
495
496/* sk_lock held by the caller */
497void virtio_transport_notify_buffer_size(struct vsock_sock *vsk, u64 *val)
498{
499	struct virtio_vsock_sock *vvs = vsk->trans;
500
501	if (*val > VIRTIO_VSOCK_MAX_BUF_SIZE)
502		*val = VIRTIO_VSOCK_MAX_BUF_SIZE;
503
504	vvs->buf_alloc = *val;
505
506	virtio_transport_send_credit_update(vsk, VIRTIO_VSOCK_TYPE_STREAM,
507					    NULL);
508}
509EXPORT_SYMBOL_GPL(virtio_transport_notify_buffer_size);
510
511int
512virtio_transport_notify_poll_in(struct vsock_sock *vsk,
513				size_t target,
514				bool *data_ready_now)
515{
516	if (vsock_stream_has_data(vsk))
517		*data_ready_now = true;
518	else
519		*data_ready_now = false;
520
521	return 0;
522}
523EXPORT_SYMBOL_GPL(virtio_transport_notify_poll_in);
524
525int
526virtio_transport_notify_poll_out(struct vsock_sock *vsk,
527				 size_t target,
528				 bool *space_avail_now)
529{
530	s64 free_space;
531
532	free_space = vsock_stream_has_space(vsk);
533	if (free_space > 0)
534		*space_avail_now = true;
535	else if (free_space == 0)
536		*space_avail_now = false;
537
538	return 0;
539}
540EXPORT_SYMBOL_GPL(virtio_transport_notify_poll_out);
541
542int virtio_transport_notify_recv_init(struct vsock_sock *vsk,
543	size_t target, struct vsock_transport_recv_notify_data *data)
544{
545	return 0;
546}
547EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_init);
548
549int virtio_transport_notify_recv_pre_block(struct vsock_sock *vsk,
550	size_t target, struct vsock_transport_recv_notify_data *data)
551{
552	return 0;
553}
554EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_pre_block);
555
556int virtio_transport_notify_recv_pre_dequeue(struct vsock_sock *vsk,
557	size_t target, struct vsock_transport_recv_notify_data *data)
558{
559	return 0;
560}
561EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_pre_dequeue);
562
563int virtio_transport_notify_recv_post_dequeue(struct vsock_sock *vsk,
564	size_t target, ssize_t copied, bool data_read,
565	struct vsock_transport_recv_notify_data *data)
566{
567	return 0;
568}
569EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_post_dequeue);
570
571int virtio_transport_notify_send_init(struct vsock_sock *vsk,
572	struct vsock_transport_send_notify_data *data)
573{
574	return 0;
575}
576EXPORT_SYMBOL_GPL(virtio_transport_notify_send_init);
577
578int virtio_transport_notify_send_pre_block(struct vsock_sock *vsk,
579	struct vsock_transport_send_notify_data *data)
580{
581	return 0;
582}
583EXPORT_SYMBOL_GPL(virtio_transport_notify_send_pre_block);
584
585int virtio_transport_notify_send_pre_enqueue(struct vsock_sock *vsk,
586	struct vsock_transport_send_notify_data *data)
587{
588	return 0;
589}
590EXPORT_SYMBOL_GPL(virtio_transport_notify_send_pre_enqueue);
591
592int virtio_transport_notify_send_post_enqueue(struct vsock_sock *vsk,
593	ssize_t written, struct vsock_transport_send_notify_data *data)
594{
595	return 0;
596}
597EXPORT_SYMBOL_GPL(virtio_transport_notify_send_post_enqueue);
598
599u64 virtio_transport_stream_rcvhiwat(struct vsock_sock *vsk)
600{
601	return vsk->buffer_size;
602}
603EXPORT_SYMBOL_GPL(virtio_transport_stream_rcvhiwat);
604
605bool virtio_transport_stream_is_active(struct vsock_sock *vsk)
606{
607	return true;
608}
609EXPORT_SYMBOL_GPL(virtio_transport_stream_is_active);
610
611bool virtio_transport_stream_allow(u32 cid, u32 port)
612{
613	return true;
614}
615EXPORT_SYMBOL_GPL(virtio_transport_stream_allow);
616
617int virtio_transport_dgram_bind(struct vsock_sock *vsk,
618				struct sockaddr_vm *addr)
619{
620	return -EOPNOTSUPP;
621}
622EXPORT_SYMBOL_GPL(virtio_transport_dgram_bind);
623
624bool virtio_transport_dgram_allow(u32 cid, u32 port)
625{
626	return false;
627}
628EXPORT_SYMBOL_GPL(virtio_transport_dgram_allow);
629
630int virtio_transport_connect(struct vsock_sock *vsk)
631{
632	struct virtio_vsock_pkt_info info = {
633		.op = VIRTIO_VSOCK_OP_REQUEST,
634		.type = VIRTIO_VSOCK_TYPE_STREAM,
635		.vsk = vsk,
636	};
637
638	return virtio_transport_send_pkt_info(vsk, &info);
639}
640EXPORT_SYMBOL_GPL(virtio_transport_connect);
641
642int virtio_transport_shutdown(struct vsock_sock *vsk, int mode)
643{
644	struct virtio_vsock_pkt_info info = {
645		.op = VIRTIO_VSOCK_OP_SHUTDOWN,
646		.type = VIRTIO_VSOCK_TYPE_STREAM,
647		.flags = (mode & RCV_SHUTDOWN ?
648			  VIRTIO_VSOCK_SHUTDOWN_RCV : 0) |
649			 (mode & SEND_SHUTDOWN ?
650			  VIRTIO_VSOCK_SHUTDOWN_SEND : 0),
651		.vsk = vsk,
652	};
653
654	return virtio_transport_send_pkt_info(vsk, &info);
655}
656EXPORT_SYMBOL_GPL(virtio_transport_shutdown);
657
658int
659virtio_transport_dgram_enqueue(struct vsock_sock *vsk,
660			       struct sockaddr_vm *remote_addr,
661			       struct msghdr *msg,
662			       size_t dgram_len)
663{
664	return -EOPNOTSUPP;
665}
666EXPORT_SYMBOL_GPL(virtio_transport_dgram_enqueue);
667
668ssize_t
669virtio_transport_stream_enqueue(struct vsock_sock *vsk,
670				struct msghdr *msg,
671				size_t len)
672{
673	struct virtio_vsock_pkt_info info = {
674		.op = VIRTIO_VSOCK_OP_RW,
675		.type = VIRTIO_VSOCK_TYPE_STREAM,
676		.msg = msg,
677		.pkt_len = len,
678		.vsk = vsk,
679	};
680
681	return virtio_transport_send_pkt_info(vsk, &info);
682}
683EXPORT_SYMBOL_GPL(virtio_transport_stream_enqueue);
684
685void virtio_transport_destruct(struct vsock_sock *vsk)
686{
687	struct virtio_vsock_sock *vvs = vsk->trans;
688
689	kfree(vvs);
690}
691EXPORT_SYMBOL_GPL(virtio_transport_destruct);
692
693static int virtio_transport_reset(struct vsock_sock *vsk,
694				  struct virtio_vsock_pkt *pkt)
695{
696	struct virtio_vsock_pkt_info info = {
697		.op = VIRTIO_VSOCK_OP_RST,
698		.type = VIRTIO_VSOCK_TYPE_STREAM,
699		.reply = !!pkt,
700		.vsk = vsk,
701	};
702
703	/* Send RST only if the original pkt is not a RST pkt */
704	if (pkt && le16_to_cpu(pkt->hdr.op) == VIRTIO_VSOCK_OP_RST)
705		return 0;
706
707	return virtio_transport_send_pkt_info(vsk, &info);
708}
709
710/* Normally packets are associated with a socket.  There may be no socket if an
711 * attempt was made to connect to a socket that does not exist.
712 */
713static int virtio_transport_reset_no_sock(const struct virtio_transport *t,
714					  struct virtio_vsock_pkt *pkt)
715{
716	struct virtio_vsock_pkt *reply;
717	struct virtio_vsock_pkt_info info = {
718		.op = VIRTIO_VSOCK_OP_RST,
719		.type = le16_to_cpu(pkt->hdr.type),
720		.reply = true,
721	};
722
723	/* Send RST only if the original pkt is not a RST pkt */
724	if (le16_to_cpu(pkt->hdr.op) == VIRTIO_VSOCK_OP_RST)
725		return 0;
726
727	reply = virtio_transport_alloc_pkt(&info, 0,
728					   le64_to_cpu(pkt->hdr.dst_cid),
729					   le32_to_cpu(pkt->hdr.dst_port),
730					   le64_to_cpu(pkt->hdr.src_cid),
731					   le32_to_cpu(pkt->hdr.src_port));
732	if (!reply)
733		return -ENOMEM;
734
735	if (!t) {
736		virtio_transport_free_pkt(reply);
737		return -ENOTCONN;
738	}
739
740	return t->send_pkt(reply);
741}
742
743/* This function should be called with sk_lock held and SOCK_DONE set */
744static void virtio_transport_remove_sock(struct vsock_sock *vsk)
745{
746	struct virtio_vsock_sock *vvs = vsk->trans;
747	struct virtio_vsock_pkt *pkt, *tmp;
748
749	/* We don't need to take rx_lock, as the socket is closing and we are
750	 * removing it.
751	 */
752	list_for_each_entry_safe(pkt, tmp, &vvs->rx_queue, list) {
753		list_del(&pkt->list);
754		virtio_transport_free_pkt(pkt);
755	}
756
757	vsock_remove_sock(vsk);
758}
759
760static void virtio_transport_wait_close(struct sock *sk, long timeout)
761{
762	if (timeout) {
763		DEFINE_WAIT_FUNC(wait, woken_wake_function);
764
765		add_wait_queue(sk_sleep(sk), &wait);
766
767		do {
768			if (sk_wait_event(sk, &timeout,
769					  sock_flag(sk, SOCK_DONE), &wait))
770				break;
771		} while (!signal_pending(current) && timeout);
772
773		remove_wait_queue(sk_sleep(sk), &wait);
774	}
775}
776
777static void virtio_transport_do_close(struct vsock_sock *vsk,
778				      bool cancel_timeout)
779{
780	struct sock *sk = sk_vsock(vsk);
781
782	sock_set_flag(sk, SOCK_DONE);
783	vsk->peer_shutdown = SHUTDOWN_MASK;
784	if (vsock_stream_has_data(vsk) <= 0)
785		sk->sk_state = TCP_CLOSING;
786	sk->sk_state_change(sk);
787
788	if (vsk->close_work_scheduled &&
789	    (!cancel_timeout || cancel_delayed_work(&vsk->close_work))) {
790		vsk->close_work_scheduled = false;
791
792		virtio_transport_remove_sock(vsk);
793
794		/* Release refcnt obtained when we scheduled the timeout */
795		sock_put(sk);
796	}
797}
798
799static void virtio_transport_close_timeout(struct work_struct *work)
800{
801	struct vsock_sock *vsk =
802		container_of(work, struct vsock_sock, close_work.work);
803	struct sock *sk = sk_vsock(vsk);
804
805	sock_hold(sk);
806	lock_sock(sk);
807
808	if (!sock_flag(sk, SOCK_DONE)) {
809		(void)virtio_transport_reset(vsk, NULL);
810
811		virtio_transport_do_close(vsk, false);
812	}
813
814	vsk->close_work_scheduled = false;
815
816	release_sock(sk);
817	sock_put(sk);
818}
819
820/* User context, vsk->sk is locked */
821static bool virtio_transport_close(struct vsock_sock *vsk)
822{
823	struct sock *sk = &vsk->sk;
824
825	if (!(sk->sk_state == TCP_ESTABLISHED ||
826	      sk->sk_state == TCP_CLOSING))
827		return true;
828
829	/* Already received SHUTDOWN from peer, reply with RST */
830	if ((vsk->peer_shutdown & SHUTDOWN_MASK) == SHUTDOWN_MASK) {
831		(void)virtio_transport_reset(vsk, NULL);
832		return true;
833	}
834
835	if ((sk->sk_shutdown & SHUTDOWN_MASK) != SHUTDOWN_MASK)
836		(void)virtio_transport_shutdown(vsk, SHUTDOWN_MASK);
837
838	if (sock_flag(sk, SOCK_LINGER) && !(current->flags & PF_EXITING))
839		virtio_transport_wait_close(sk, sk->sk_lingertime);
840
841	if (sock_flag(sk, SOCK_DONE)) {
842		return true;
843	}
844
845	sock_hold(sk);
846	INIT_DELAYED_WORK(&vsk->close_work,
847			  virtio_transport_close_timeout);
848	vsk->close_work_scheduled = true;
849	schedule_delayed_work(&vsk->close_work, VSOCK_CLOSE_TIMEOUT);
850	return false;
851}
852
853void virtio_transport_release(struct vsock_sock *vsk)
854{
855	struct sock *sk = &vsk->sk;
856	bool remove_sock = true;
857
858	if (sk->sk_type == SOCK_STREAM)
859		remove_sock = virtio_transport_close(vsk);
860
861	if (remove_sock) {
862		sock_set_flag(sk, SOCK_DONE);
863		virtio_transport_remove_sock(vsk);
864	}
865}
866EXPORT_SYMBOL_GPL(virtio_transport_release);
867
868static int
869virtio_transport_recv_connecting(struct sock *sk,
870				 struct virtio_vsock_pkt *pkt)
871{
872	struct vsock_sock *vsk = vsock_sk(sk);
873	int err;
874	int skerr;
875
876	switch (le16_to_cpu(pkt->hdr.op)) {
877	case VIRTIO_VSOCK_OP_RESPONSE:
878		sk->sk_state = TCP_ESTABLISHED;
879		sk->sk_socket->state = SS_CONNECTED;
880		vsock_insert_connected(vsk);
881		sk->sk_state_change(sk);
882		break;
883	case VIRTIO_VSOCK_OP_INVALID:
884		break;
885	case VIRTIO_VSOCK_OP_RST:
886		skerr = ECONNRESET;
887		err = 0;
888		goto destroy;
889	default:
890		skerr = EPROTO;
891		err = -EINVAL;
892		goto destroy;
893	}
894	return 0;
895
896destroy:
897	virtio_transport_reset(vsk, pkt);
898	sk->sk_state = TCP_CLOSE;
899	sk->sk_err = skerr;
900	sk->sk_error_report(sk);
901	return err;
902}
903
904static void
905virtio_transport_recv_enqueue(struct vsock_sock *vsk,
906			      struct virtio_vsock_pkt *pkt)
907{
908	struct virtio_vsock_sock *vvs = vsk->trans;
909	bool can_enqueue, free_pkt = false;
910
911	pkt->len = le32_to_cpu(pkt->hdr.len);
912	pkt->off = 0;
913
914	spin_lock_bh(&vvs->rx_lock);
915
916	can_enqueue = virtio_transport_inc_rx_pkt(vvs, pkt);
917	if (!can_enqueue) {
918		free_pkt = true;
919		goto out;
920	}
921
922	/* Try to copy small packets into the buffer of last packet queued,
923	 * to avoid wasting memory queueing the entire buffer with a small
924	 * payload.
925	 */
926	if (pkt->len <= GOOD_COPY_LEN && !list_empty(&vvs->rx_queue)) {
927		struct virtio_vsock_pkt *last_pkt;
928
929		last_pkt = list_last_entry(&vvs->rx_queue,
930					   struct virtio_vsock_pkt, list);
931
932		/* If there is space in the last packet queued, we copy the
933		 * new packet in its buffer.
934		 */
935		if (pkt->len <= last_pkt->buf_len - last_pkt->len) {
936			memcpy(last_pkt->buf + last_pkt->len, pkt->buf,
937			       pkt->len);
938			last_pkt->len += pkt->len;
939			free_pkt = true;
940			goto out;
941		}
942	}
943
944	list_add_tail(&pkt->list, &vvs->rx_queue);
945
946out:
947	spin_unlock_bh(&vvs->rx_lock);
948	if (free_pkt)
949		virtio_transport_free_pkt(pkt);
950}
951
952static int
953virtio_transport_recv_connected(struct sock *sk,
954				struct virtio_vsock_pkt *pkt)
955{
956	struct vsock_sock *vsk = vsock_sk(sk);
957	int err = 0;
958
959	switch (le16_to_cpu(pkt->hdr.op)) {
960	case VIRTIO_VSOCK_OP_RW:
961		virtio_transport_recv_enqueue(vsk, pkt);
962		sk->sk_data_ready(sk);
963		return err;
964	case VIRTIO_VSOCK_OP_CREDIT_UPDATE:
965		sk->sk_write_space(sk);
966		break;
967	case VIRTIO_VSOCK_OP_SHUTDOWN:
968		if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SHUTDOWN_RCV)
969			vsk->peer_shutdown |= RCV_SHUTDOWN;
970		if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SHUTDOWN_SEND)
971			vsk->peer_shutdown |= SEND_SHUTDOWN;
972		if (vsk->peer_shutdown == SHUTDOWN_MASK &&
973		    vsock_stream_has_data(vsk) <= 0 &&
974		    !sock_flag(sk, SOCK_DONE)) {
975			(void)virtio_transport_reset(vsk, NULL);
976
977			virtio_transport_do_close(vsk, true);
978		}
979		if (le32_to_cpu(pkt->hdr.flags))
980			sk->sk_state_change(sk);
981		break;
982	case VIRTIO_VSOCK_OP_RST:
983		virtio_transport_do_close(vsk, true);
984		break;
985	default:
986		err = -EINVAL;
987		break;
988	}
989
990	virtio_transport_free_pkt(pkt);
991	return err;
992}
993
994static void
995virtio_transport_recv_disconnecting(struct sock *sk,
996				    struct virtio_vsock_pkt *pkt)
997{
998	struct vsock_sock *vsk = vsock_sk(sk);
999
1000	if (le16_to_cpu(pkt->hdr.op) == VIRTIO_VSOCK_OP_RST)
1001		virtio_transport_do_close(vsk, true);
1002}
1003
1004static int
1005virtio_transport_send_response(struct vsock_sock *vsk,
1006			       struct virtio_vsock_pkt *pkt)
1007{
1008	struct virtio_vsock_pkt_info info = {
1009		.op = VIRTIO_VSOCK_OP_RESPONSE,
1010		.type = VIRTIO_VSOCK_TYPE_STREAM,
1011		.remote_cid = le64_to_cpu(pkt->hdr.src_cid),
1012		.remote_port = le32_to_cpu(pkt->hdr.src_port),
1013		.reply = true,
1014		.vsk = vsk,
1015	};
1016
1017	return virtio_transport_send_pkt_info(vsk, &info);
1018}
1019
1020static bool virtio_transport_space_update(struct sock *sk,
1021					  struct virtio_vsock_pkt *pkt)
1022{
1023	struct vsock_sock *vsk = vsock_sk(sk);
1024	struct virtio_vsock_sock *vvs = vsk->trans;
1025	bool space_available;
1026
1027	/* Listener sockets are not associated with any transport, so we are
1028	 * not able to take the state to see if there is space available in the
1029	 * remote peer, but since they are only used to receive requests, we
1030	 * can assume that there is always space available in the other peer.
1031	 */
1032	if (!vvs)
1033		return true;
1034
1035	/* buf_alloc and fwd_cnt is always included in the hdr */
1036	spin_lock_bh(&vvs->tx_lock);
1037	vvs->peer_buf_alloc = le32_to_cpu(pkt->hdr.buf_alloc);
1038	vvs->peer_fwd_cnt = le32_to_cpu(pkt->hdr.fwd_cnt);
1039	space_available = virtio_transport_has_space(vsk);
1040	spin_unlock_bh(&vvs->tx_lock);
1041	return space_available;
1042}
1043
1044/* Handle server socket */
1045static int
1046virtio_transport_recv_listen(struct sock *sk, struct virtio_vsock_pkt *pkt,
1047			     struct virtio_transport *t)
1048{
1049	struct vsock_sock *vsk = vsock_sk(sk);
1050	struct vsock_sock *vchild;
1051	struct sock *child;
1052	int ret;
1053
1054	if (le16_to_cpu(pkt->hdr.op) != VIRTIO_VSOCK_OP_REQUEST) {
1055		virtio_transport_reset_no_sock(t, pkt);
1056		return -EINVAL;
1057	}
1058
1059	if (sk_acceptq_is_full(sk)) {
1060		virtio_transport_reset_no_sock(t, pkt);
1061		return -ENOMEM;
1062	}
1063
1064	child = vsock_create_connected(sk);
1065	if (!child) {
1066		virtio_transport_reset_no_sock(t, pkt);
1067		return -ENOMEM;
1068	}
1069
1070	sk_acceptq_added(sk);
1071
1072	lock_sock_nested(child, SINGLE_DEPTH_NESTING);
1073
1074	child->sk_state = TCP_ESTABLISHED;
1075
1076	vchild = vsock_sk(child);
1077	vsock_addr_init(&vchild->local_addr, le64_to_cpu(pkt->hdr.dst_cid),
1078			le32_to_cpu(pkt->hdr.dst_port));
1079	vsock_addr_init(&vchild->remote_addr, le64_to_cpu(pkt->hdr.src_cid),
1080			le32_to_cpu(pkt->hdr.src_port));
1081
1082	ret = vsock_assign_transport(vchild, vsk);
1083	/* Transport assigned (looking at remote_addr) must be the same
1084	 * where we received the request.
1085	 */
1086	if (ret || vchild->transport != &t->transport) {
1087		release_sock(child);
1088		virtio_transport_reset_no_sock(t, pkt);
1089		sock_put(child);
1090		return ret;
1091	}
1092
1093	if (virtio_transport_space_update(child, pkt))
1094		child->sk_write_space(child);
1095
1096	vsock_insert_connected(vchild);
1097	vsock_enqueue_accept(sk, child);
1098	virtio_transport_send_response(vchild, pkt);
1099
1100	release_sock(child);
1101
1102	sk->sk_data_ready(sk);
1103	return 0;
1104}
1105
1106/* We are under the virtio-vsock's vsock->rx_lock or vhost-vsock's vq->mutex
1107 * lock.
1108 */
1109void virtio_transport_recv_pkt(struct virtio_transport *t,
1110			       struct virtio_vsock_pkt *pkt)
1111{
1112	struct sockaddr_vm src, dst;
1113	struct vsock_sock *vsk;
1114	struct sock *sk;
1115	bool space_available;
1116
1117	vsock_addr_init(&src, le64_to_cpu(pkt->hdr.src_cid),
1118			le32_to_cpu(pkt->hdr.src_port));
1119	vsock_addr_init(&dst, le64_to_cpu(pkt->hdr.dst_cid),
1120			le32_to_cpu(pkt->hdr.dst_port));
1121
1122	trace_virtio_transport_recv_pkt(src.svm_cid, src.svm_port,
1123					dst.svm_cid, dst.svm_port,
1124					le32_to_cpu(pkt->hdr.len),
1125					le16_to_cpu(pkt->hdr.type),
1126					le16_to_cpu(pkt->hdr.op),
1127					le32_to_cpu(pkt->hdr.flags),
1128					le32_to_cpu(pkt->hdr.buf_alloc),
1129					le32_to_cpu(pkt->hdr.fwd_cnt));
1130
1131	if (le16_to_cpu(pkt->hdr.type) != VIRTIO_VSOCK_TYPE_STREAM) {
1132		(void)virtio_transport_reset_no_sock(t, pkt);
1133		goto free_pkt;
1134	}
1135
1136	/* The socket must be in connected or bound table
1137	 * otherwise send reset back
1138	 */
1139	sk = vsock_find_connected_socket(&src, &dst);
1140	if (!sk) {
1141		sk = vsock_find_bound_socket(&dst);
1142		if (!sk) {
1143			(void)virtio_transport_reset_no_sock(t, pkt);
1144			goto free_pkt;
1145		}
1146	}
1147
1148	vsk = vsock_sk(sk);
1149
1150	lock_sock(sk);
1151
1152	/* Check if sk has been closed before lock_sock */
1153	if (sock_flag(sk, SOCK_DONE)) {
1154		(void)virtio_transport_reset_no_sock(t, pkt);
1155		release_sock(sk);
1156		sock_put(sk);
1157		goto free_pkt;
1158	}
1159
1160	space_available = virtio_transport_space_update(sk, pkt);
1161
1162	/* Update CID in case it has changed after a transport reset event */
1163	if (vsk->local_addr.svm_cid != VMADDR_CID_ANY)
1164		vsk->local_addr.svm_cid = dst.svm_cid;
1165
1166	if (space_available)
1167		sk->sk_write_space(sk);
1168
1169	switch (sk->sk_state) {
1170	case TCP_LISTEN:
1171		virtio_transport_recv_listen(sk, pkt, t);
1172		virtio_transport_free_pkt(pkt);
1173		break;
1174	case TCP_SYN_SENT:
1175		virtio_transport_recv_connecting(sk, pkt);
1176		virtio_transport_free_pkt(pkt);
1177		break;
1178	case TCP_ESTABLISHED:
1179		virtio_transport_recv_connected(sk, pkt);
1180		break;
1181	case TCP_CLOSING:
1182		virtio_transport_recv_disconnecting(sk, pkt);
1183		virtio_transport_free_pkt(pkt);
1184		break;
1185	default:
1186		(void)virtio_transport_reset_no_sock(t, pkt);
1187		virtio_transport_free_pkt(pkt);
1188		break;
1189	}
1190
1191	release_sock(sk);
1192
1193	/* Release refcnt obtained when we fetched this socket out of the
1194	 * bound or connected list.
1195	 */
1196	sock_put(sk);
1197	return;
1198
1199free_pkt:
1200	virtio_transport_free_pkt(pkt);
1201}
1202EXPORT_SYMBOL_GPL(virtio_transport_recv_pkt);
1203
1204void virtio_transport_free_pkt(struct virtio_vsock_pkt *pkt)
1205{
1206	kvfree(pkt->buf);
1207	kfree(pkt);
1208}
1209EXPORT_SYMBOL_GPL(virtio_transport_free_pkt);
1210
1211MODULE_LICENSE("GPL v2");
1212MODULE_AUTHOR("Asias He");
1213MODULE_DESCRIPTION("common code for virtio vsock");
1214