1// SPDX-License-Identifier: GPL-2.0-only 2/* 3 * common code for virtio vsock 4 * 5 * Copyright (C) 2013-2015 Red Hat, Inc. 6 * Author: Asias He <asias@redhat.com> 7 * Stefan Hajnoczi <stefanha@redhat.com> 8 */ 9#include <linux/spinlock.h> 10#include <linux/module.h> 11#include <linux/sched/signal.h> 12#include <linux/ctype.h> 13#include <linux/list.h> 14#include <linux/virtio_vsock.h> 15#include <uapi/linux/vsockmon.h> 16 17#include <net/sock.h> 18#include <net/af_vsock.h> 19 20#define CREATE_TRACE_POINTS 21#include <trace/events/vsock_virtio_transport_common.h> 22 23/* How long to wait for graceful shutdown of a connection */ 24#define VSOCK_CLOSE_TIMEOUT (8 * HZ) 25 26/* Threshold for detecting small packets to copy */ 27#define GOOD_COPY_LEN 128 28 29static const struct virtio_transport * 30virtio_transport_get_ops(struct vsock_sock *vsk) 31{ 32 const struct vsock_transport *t = vsock_core_get_transport(vsk); 33 34 if (WARN_ON(!t)) 35 return NULL; 36 37 return container_of(t, struct virtio_transport, transport); 38} 39 40static struct virtio_vsock_pkt * 41virtio_transport_alloc_pkt(struct virtio_vsock_pkt_info *info, 42 size_t len, 43 u32 src_cid, 44 u32 src_port, 45 u32 dst_cid, 46 u32 dst_port) 47{ 48 struct virtio_vsock_pkt *pkt; 49 int err; 50 51 pkt = kzalloc(sizeof(*pkt), GFP_KERNEL); 52 if (!pkt) 53 return NULL; 54 55 pkt->hdr.type = cpu_to_le16(info->type); 56 pkt->hdr.op = cpu_to_le16(info->op); 57 pkt->hdr.src_cid = cpu_to_le64(src_cid); 58 pkt->hdr.dst_cid = cpu_to_le64(dst_cid); 59 pkt->hdr.src_port = cpu_to_le32(src_port); 60 pkt->hdr.dst_port = cpu_to_le32(dst_port); 61 pkt->hdr.flags = cpu_to_le32(info->flags); 62 pkt->len = len; 63 pkt->hdr.len = cpu_to_le32(len); 64 pkt->reply = info->reply; 65 pkt->vsk = info->vsk; 66 67 if (info->msg && len > 0) { 68 pkt->buf = kmalloc(len, GFP_KERNEL); 69 if (!pkt->buf) 70 goto out_pkt; 71 72 pkt->buf_len = len; 73 74 err = memcpy_from_msg(pkt->buf, info->msg, len); 75 if (err) 76 goto out; 77 } 78 79 trace_virtio_transport_alloc_pkt(src_cid, src_port, 80 dst_cid, dst_port, 81 len, 82 info->type, 83 info->op, 84 info->flags); 85 86 return pkt; 87 88out: 89 kfree(pkt->buf); 90out_pkt: 91 kfree(pkt); 92 return NULL; 93} 94 95/* Packet capture */ 96static struct sk_buff *virtio_transport_build_skb(void *opaque) 97{ 98 struct virtio_vsock_pkt *pkt = opaque; 99 struct af_vsockmon_hdr *hdr; 100 struct sk_buff *skb; 101 size_t payload_len; 102 void *payload_buf; 103 104 /* A packet could be split to fit the RX buffer, so we can retrieve 105 * the payload length from the header and the buffer pointer taking 106 * care of the offset in the original packet. 107 */ 108 payload_len = le32_to_cpu(pkt->hdr.len); 109 payload_buf = pkt->buf + pkt->off; 110 111 skb = alloc_skb(sizeof(*hdr) + sizeof(pkt->hdr) + payload_len, 112 GFP_ATOMIC); 113 if (!skb) 114 return NULL; 115 116 hdr = skb_put(skb, sizeof(*hdr)); 117 118 /* pkt->hdr is little-endian so no need to byteswap here */ 119 hdr->src_cid = pkt->hdr.src_cid; 120 hdr->src_port = pkt->hdr.src_port; 121 hdr->dst_cid = pkt->hdr.dst_cid; 122 hdr->dst_port = pkt->hdr.dst_port; 123 124 hdr->transport = cpu_to_le16(AF_VSOCK_TRANSPORT_VIRTIO); 125 hdr->len = cpu_to_le16(sizeof(pkt->hdr)); 126 memset(hdr->reserved, 0, sizeof(hdr->reserved)); 127 128 switch (le16_to_cpu(pkt->hdr.op)) { 129 case VIRTIO_VSOCK_OP_REQUEST: 130 case VIRTIO_VSOCK_OP_RESPONSE: 131 hdr->op = cpu_to_le16(AF_VSOCK_OP_CONNECT); 132 break; 133 case VIRTIO_VSOCK_OP_RST: 134 case VIRTIO_VSOCK_OP_SHUTDOWN: 135 hdr->op = cpu_to_le16(AF_VSOCK_OP_DISCONNECT); 136 break; 137 case VIRTIO_VSOCK_OP_RW: 138 hdr->op = cpu_to_le16(AF_VSOCK_OP_PAYLOAD); 139 break; 140 case VIRTIO_VSOCK_OP_CREDIT_UPDATE: 141 case VIRTIO_VSOCK_OP_CREDIT_REQUEST: 142 hdr->op = cpu_to_le16(AF_VSOCK_OP_CONTROL); 143 break; 144 default: 145 hdr->op = cpu_to_le16(AF_VSOCK_OP_UNKNOWN); 146 break; 147 } 148 149 skb_put_data(skb, &pkt->hdr, sizeof(pkt->hdr)); 150 151 if (payload_len) { 152 skb_put_data(skb, payload_buf, payload_len); 153 } 154 155 return skb; 156} 157 158void virtio_transport_deliver_tap_pkt(struct virtio_vsock_pkt *pkt) 159{ 160 if (pkt->tap_delivered) 161 return; 162 163 vsock_deliver_tap(virtio_transport_build_skb, pkt); 164 pkt->tap_delivered = true; 165} 166EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt); 167 168/* This function can only be used on connecting/connected sockets, 169 * since a socket assigned to a transport is required. 170 * 171 * Do not use on listener sockets! 172 */ 173static int virtio_transport_send_pkt_info(struct vsock_sock *vsk, 174 struct virtio_vsock_pkt_info *info) 175{ 176 u32 src_cid, src_port, dst_cid, dst_port; 177 const struct virtio_transport *t_ops; 178 struct virtio_vsock_sock *vvs; 179 struct virtio_vsock_pkt *pkt; 180 u32 pkt_len = info->pkt_len; 181 182 t_ops = virtio_transport_get_ops(vsk); 183 if (unlikely(!t_ops)) 184 return -EFAULT; 185 186 src_cid = t_ops->transport.get_local_cid(); 187 src_port = vsk->local_addr.svm_port; 188 if (!info->remote_cid) { 189 dst_cid = vsk->remote_addr.svm_cid; 190 dst_port = vsk->remote_addr.svm_port; 191 } else { 192 dst_cid = info->remote_cid; 193 dst_port = info->remote_port; 194 } 195 196 vvs = vsk->trans; 197 198 /* we can send less than pkt_len bytes */ 199 if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) 200 pkt_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE; 201 202 /* virtio_transport_get_credit might return less than pkt_len credit */ 203 pkt_len = virtio_transport_get_credit(vvs, pkt_len); 204 205 /* Do not send zero length OP_RW pkt */ 206 if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW) 207 return pkt_len; 208 209 pkt = virtio_transport_alloc_pkt(info, pkt_len, 210 src_cid, src_port, 211 dst_cid, dst_port); 212 if (!pkt) { 213 virtio_transport_put_credit(vvs, pkt_len); 214 return -ENOMEM; 215 } 216 217 virtio_transport_inc_tx_pkt(vvs, pkt); 218 219 return t_ops->send_pkt(pkt); 220} 221 222static bool virtio_transport_inc_rx_pkt(struct virtio_vsock_sock *vvs, 223 struct virtio_vsock_pkt *pkt) 224{ 225 if (vvs->rx_bytes + pkt->len > vvs->buf_alloc) 226 return false; 227 228 vvs->rx_bytes += pkt->len; 229 return true; 230} 231 232static void virtio_transport_dec_rx_pkt(struct virtio_vsock_sock *vvs, 233 struct virtio_vsock_pkt *pkt) 234{ 235 vvs->rx_bytes -= pkt->len; 236 vvs->fwd_cnt += pkt->len; 237} 238 239void virtio_transport_inc_tx_pkt(struct virtio_vsock_sock *vvs, struct virtio_vsock_pkt *pkt) 240{ 241 spin_lock_bh(&vvs->rx_lock); 242 vvs->last_fwd_cnt = vvs->fwd_cnt; 243 pkt->hdr.fwd_cnt = cpu_to_le32(vvs->fwd_cnt); 244 pkt->hdr.buf_alloc = cpu_to_le32(vvs->buf_alloc); 245 spin_unlock_bh(&vvs->rx_lock); 246} 247EXPORT_SYMBOL_GPL(virtio_transport_inc_tx_pkt); 248 249u32 virtio_transport_get_credit(struct virtio_vsock_sock *vvs, u32 credit) 250{ 251 u32 ret; 252 253 spin_lock_bh(&vvs->tx_lock); 254 ret = vvs->peer_buf_alloc - (vvs->tx_cnt - vvs->peer_fwd_cnt); 255 if (ret > credit) 256 ret = credit; 257 vvs->tx_cnt += ret; 258 spin_unlock_bh(&vvs->tx_lock); 259 260 return ret; 261} 262EXPORT_SYMBOL_GPL(virtio_transport_get_credit); 263 264void virtio_transport_put_credit(struct virtio_vsock_sock *vvs, u32 credit) 265{ 266 spin_lock_bh(&vvs->tx_lock); 267 vvs->tx_cnt -= credit; 268 spin_unlock_bh(&vvs->tx_lock); 269} 270EXPORT_SYMBOL_GPL(virtio_transport_put_credit); 271 272static int virtio_transport_send_credit_update(struct vsock_sock *vsk, 273 int type, 274 struct virtio_vsock_hdr *hdr) 275{ 276 struct virtio_vsock_pkt_info info = { 277 .op = VIRTIO_VSOCK_OP_CREDIT_UPDATE, 278 .type = type, 279 .vsk = vsk, 280 }; 281 282 return virtio_transport_send_pkt_info(vsk, &info); 283} 284 285static ssize_t 286virtio_transport_stream_do_peek(struct vsock_sock *vsk, 287 struct msghdr *msg, 288 size_t len) 289{ 290 struct virtio_vsock_sock *vvs = vsk->trans; 291 struct virtio_vsock_pkt *pkt; 292 size_t bytes, total = 0, off; 293 int err = -EFAULT; 294 295 spin_lock_bh(&vvs->rx_lock); 296 297 list_for_each_entry(pkt, &vvs->rx_queue, list) { 298 off = pkt->off; 299 300 if (total == len) 301 break; 302 303 while (total < len && off < pkt->len) { 304 bytes = len - total; 305 if (bytes > pkt->len - off) 306 bytes = pkt->len - off; 307 308 /* sk_lock is held by caller so no one else can dequeue. 309 * Unlock rx_lock since memcpy_to_msg() may sleep. 310 */ 311 spin_unlock_bh(&vvs->rx_lock); 312 313 err = memcpy_to_msg(msg, pkt->buf + off, bytes); 314 if (err) 315 goto out; 316 317 spin_lock_bh(&vvs->rx_lock); 318 319 total += bytes; 320 off += bytes; 321 } 322 } 323 324 spin_unlock_bh(&vvs->rx_lock); 325 326 return total; 327 328out: 329 if (total) 330 err = total; 331 return err; 332} 333 334static ssize_t 335virtio_transport_stream_do_dequeue(struct vsock_sock *vsk, 336 struct msghdr *msg, 337 size_t len) 338{ 339 struct virtio_vsock_sock *vvs = vsk->trans; 340 struct virtio_vsock_pkt *pkt; 341 size_t bytes, total = 0; 342 u32 free_space; 343 u32 fwd_cnt_delta; 344 bool low_rx_bytes; 345 int err = -EFAULT; 346 347 spin_lock_bh(&vvs->rx_lock); 348 while (total < len && !list_empty(&vvs->rx_queue)) { 349 pkt = list_first_entry(&vvs->rx_queue, 350 struct virtio_vsock_pkt, list); 351 352 bytes = len - total; 353 if (bytes > pkt->len - pkt->off) 354 bytes = pkt->len - pkt->off; 355 356 /* sk_lock is held by caller so no one else can dequeue. 357 * Unlock rx_lock since memcpy_to_msg() may sleep. 358 */ 359 spin_unlock_bh(&vvs->rx_lock); 360 361 err = memcpy_to_msg(msg, pkt->buf + pkt->off, bytes); 362 if (err) 363 goto out; 364 365 spin_lock_bh(&vvs->rx_lock); 366 367 total += bytes; 368 pkt->off += bytes; 369 if (pkt->off == pkt->len) { 370 virtio_transport_dec_rx_pkt(vvs, pkt); 371 list_del(&pkt->list); 372 virtio_transport_free_pkt(pkt); 373 } 374 } 375 376 fwd_cnt_delta = vvs->fwd_cnt - vvs->last_fwd_cnt; 377 free_space = vvs->buf_alloc - fwd_cnt_delta; 378 low_rx_bytes = (vvs->rx_bytes < 379 sock_rcvlowat(sk_vsock(vsk), 0, INT_MAX)); 380 381 spin_unlock_bh(&vvs->rx_lock); 382 383 /* To reduce the number of credit update messages, 384 * don't update credits as long as lots of space is available. 385 * Note: the limit chosen here is arbitrary. Setting the limit 386 * too high causes extra messages. Too low causes transmitter 387 * stalls. As stalls are in theory more expensive than extra 388 * messages, we set the limit to a high value. TODO: experiment 389 * with different values. Also send credit update message when 390 * number of bytes in rx queue is not enough to wake up reader. 391 */ 392 if (fwd_cnt_delta && 393 (free_space < VIRTIO_VSOCK_MAX_PKT_BUF_SIZE || low_rx_bytes)) { 394 virtio_transport_send_credit_update(vsk, 395 VIRTIO_VSOCK_TYPE_STREAM, 396 NULL); 397 } 398 399 return total; 400 401out: 402 if (total) 403 err = total; 404 return err; 405} 406 407ssize_t 408virtio_transport_stream_dequeue(struct vsock_sock *vsk, 409 struct msghdr *msg, 410 size_t len, int flags) 411{ 412 if (flags & MSG_PEEK) 413 return virtio_transport_stream_do_peek(vsk, msg, len); 414 else 415 return virtio_transport_stream_do_dequeue(vsk, msg, len); 416} 417EXPORT_SYMBOL_GPL(virtio_transport_stream_dequeue); 418 419int 420virtio_transport_dgram_dequeue(struct vsock_sock *vsk, 421 struct msghdr *msg, 422 size_t len, int flags) 423{ 424 return -EOPNOTSUPP; 425} 426EXPORT_SYMBOL_GPL(virtio_transport_dgram_dequeue); 427 428s64 virtio_transport_stream_has_data(struct vsock_sock *vsk) 429{ 430 struct virtio_vsock_sock *vvs = vsk->trans; 431 s64 bytes; 432 433 spin_lock_bh(&vvs->rx_lock); 434 bytes = vvs->rx_bytes; 435 spin_unlock_bh(&vvs->rx_lock); 436 437 return bytes; 438} 439EXPORT_SYMBOL_GPL(virtio_transport_stream_has_data); 440 441static s64 virtio_transport_has_space(struct vsock_sock *vsk) 442{ 443 struct virtio_vsock_sock *vvs = vsk->trans; 444 s64 bytes; 445 446 bytes = (s64)vvs->peer_buf_alloc - (vvs->tx_cnt - vvs->peer_fwd_cnt); 447 if (bytes < 0) 448 bytes = 0; 449 450 return bytes; 451} 452 453s64 virtio_transport_stream_has_space(struct vsock_sock *vsk) 454{ 455 struct virtio_vsock_sock *vvs = vsk->trans; 456 s64 bytes; 457 458 spin_lock_bh(&vvs->tx_lock); 459 bytes = virtio_transport_has_space(vsk); 460 spin_unlock_bh(&vvs->tx_lock); 461 462 return bytes; 463} 464EXPORT_SYMBOL_GPL(virtio_transport_stream_has_space); 465 466int virtio_transport_do_socket_init(struct vsock_sock *vsk, 467 struct vsock_sock *psk) 468{ 469 struct virtio_vsock_sock *vvs; 470 471 vvs = kzalloc(sizeof(*vvs), GFP_KERNEL); 472 if (!vvs) 473 return -ENOMEM; 474 475 vsk->trans = vvs; 476 vvs->vsk = vsk; 477 if (psk && psk->trans) { 478 struct virtio_vsock_sock *ptrans = psk->trans; 479 480 vvs->peer_buf_alloc = ptrans->peer_buf_alloc; 481 } 482 483 if (vsk->buffer_size > VIRTIO_VSOCK_MAX_BUF_SIZE) 484 vsk->buffer_size = VIRTIO_VSOCK_MAX_BUF_SIZE; 485 486 vvs->buf_alloc = vsk->buffer_size; 487 488 spin_lock_init(&vvs->rx_lock); 489 spin_lock_init(&vvs->tx_lock); 490 INIT_LIST_HEAD(&vvs->rx_queue); 491 492 return 0; 493} 494EXPORT_SYMBOL_GPL(virtio_transport_do_socket_init); 495 496/* sk_lock held by the caller */ 497void virtio_transport_notify_buffer_size(struct vsock_sock *vsk, u64 *val) 498{ 499 struct virtio_vsock_sock *vvs = vsk->trans; 500 501 if (*val > VIRTIO_VSOCK_MAX_BUF_SIZE) 502 *val = VIRTIO_VSOCK_MAX_BUF_SIZE; 503 504 vvs->buf_alloc = *val; 505 506 virtio_transport_send_credit_update(vsk, VIRTIO_VSOCK_TYPE_STREAM, 507 NULL); 508} 509EXPORT_SYMBOL_GPL(virtio_transport_notify_buffer_size); 510 511int 512virtio_transport_notify_poll_in(struct vsock_sock *vsk, 513 size_t target, 514 bool *data_ready_now) 515{ 516 if (vsock_stream_has_data(vsk)) 517 *data_ready_now = true; 518 else 519 *data_ready_now = false; 520 521 return 0; 522} 523EXPORT_SYMBOL_GPL(virtio_transport_notify_poll_in); 524 525int 526virtio_transport_notify_poll_out(struct vsock_sock *vsk, 527 size_t target, 528 bool *space_avail_now) 529{ 530 s64 free_space; 531 532 free_space = vsock_stream_has_space(vsk); 533 if (free_space > 0) 534 *space_avail_now = true; 535 else if (free_space == 0) 536 *space_avail_now = false; 537 538 return 0; 539} 540EXPORT_SYMBOL_GPL(virtio_transport_notify_poll_out); 541 542int virtio_transport_notify_recv_init(struct vsock_sock *vsk, 543 size_t target, struct vsock_transport_recv_notify_data *data) 544{ 545 return 0; 546} 547EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_init); 548 549int virtio_transport_notify_recv_pre_block(struct vsock_sock *vsk, 550 size_t target, struct vsock_transport_recv_notify_data *data) 551{ 552 return 0; 553} 554EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_pre_block); 555 556int virtio_transport_notify_recv_pre_dequeue(struct vsock_sock *vsk, 557 size_t target, struct vsock_transport_recv_notify_data *data) 558{ 559 return 0; 560} 561EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_pre_dequeue); 562 563int virtio_transport_notify_recv_post_dequeue(struct vsock_sock *vsk, 564 size_t target, ssize_t copied, bool data_read, 565 struct vsock_transport_recv_notify_data *data) 566{ 567 return 0; 568} 569EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_post_dequeue); 570 571int virtio_transport_notify_send_init(struct vsock_sock *vsk, 572 struct vsock_transport_send_notify_data *data) 573{ 574 return 0; 575} 576EXPORT_SYMBOL_GPL(virtio_transport_notify_send_init); 577 578int virtio_transport_notify_send_pre_block(struct vsock_sock *vsk, 579 struct vsock_transport_send_notify_data *data) 580{ 581 return 0; 582} 583EXPORT_SYMBOL_GPL(virtio_transport_notify_send_pre_block); 584 585int virtio_transport_notify_send_pre_enqueue(struct vsock_sock *vsk, 586 struct vsock_transport_send_notify_data *data) 587{ 588 return 0; 589} 590EXPORT_SYMBOL_GPL(virtio_transport_notify_send_pre_enqueue); 591 592int virtio_transport_notify_send_post_enqueue(struct vsock_sock *vsk, 593 ssize_t written, struct vsock_transport_send_notify_data *data) 594{ 595 return 0; 596} 597EXPORT_SYMBOL_GPL(virtio_transport_notify_send_post_enqueue); 598 599u64 virtio_transport_stream_rcvhiwat(struct vsock_sock *vsk) 600{ 601 return vsk->buffer_size; 602} 603EXPORT_SYMBOL_GPL(virtio_transport_stream_rcvhiwat); 604 605bool virtio_transport_stream_is_active(struct vsock_sock *vsk) 606{ 607 return true; 608} 609EXPORT_SYMBOL_GPL(virtio_transport_stream_is_active); 610 611bool virtio_transport_stream_allow(u32 cid, u32 port) 612{ 613 return true; 614} 615EXPORT_SYMBOL_GPL(virtio_transport_stream_allow); 616 617int virtio_transport_dgram_bind(struct vsock_sock *vsk, 618 struct sockaddr_vm *addr) 619{ 620 return -EOPNOTSUPP; 621} 622EXPORT_SYMBOL_GPL(virtio_transport_dgram_bind); 623 624bool virtio_transport_dgram_allow(u32 cid, u32 port) 625{ 626 return false; 627} 628EXPORT_SYMBOL_GPL(virtio_transport_dgram_allow); 629 630int virtio_transport_connect(struct vsock_sock *vsk) 631{ 632 struct virtio_vsock_pkt_info info = { 633 .op = VIRTIO_VSOCK_OP_REQUEST, 634 .type = VIRTIO_VSOCK_TYPE_STREAM, 635 .vsk = vsk, 636 }; 637 638 return virtio_transport_send_pkt_info(vsk, &info); 639} 640EXPORT_SYMBOL_GPL(virtio_transport_connect); 641 642int virtio_transport_shutdown(struct vsock_sock *vsk, int mode) 643{ 644 struct virtio_vsock_pkt_info info = { 645 .op = VIRTIO_VSOCK_OP_SHUTDOWN, 646 .type = VIRTIO_VSOCK_TYPE_STREAM, 647 .flags = (mode & RCV_SHUTDOWN ? 648 VIRTIO_VSOCK_SHUTDOWN_RCV : 0) | 649 (mode & SEND_SHUTDOWN ? 650 VIRTIO_VSOCK_SHUTDOWN_SEND : 0), 651 .vsk = vsk, 652 }; 653 654 return virtio_transport_send_pkt_info(vsk, &info); 655} 656EXPORT_SYMBOL_GPL(virtio_transport_shutdown); 657 658int 659virtio_transport_dgram_enqueue(struct vsock_sock *vsk, 660 struct sockaddr_vm *remote_addr, 661 struct msghdr *msg, 662 size_t dgram_len) 663{ 664 return -EOPNOTSUPP; 665} 666EXPORT_SYMBOL_GPL(virtio_transport_dgram_enqueue); 667 668ssize_t 669virtio_transport_stream_enqueue(struct vsock_sock *vsk, 670 struct msghdr *msg, 671 size_t len) 672{ 673 struct virtio_vsock_pkt_info info = { 674 .op = VIRTIO_VSOCK_OP_RW, 675 .type = VIRTIO_VSOCK_TYPE_STREAM, 676 .msg = msg, 677 .pkt_len = len, 678 .vsk = vsk, 679 }; 680 681 return virtio_transport_send_pkt_info(vsk, &info); 682} 683EXPORT_SYMBOL_GPL(virtio_transport_stream_enqueue); 684 685void virtio_transport_destruct(struct vsock_sock *vsk) 686{ 687 struct virtio_vsock_sock *vvs = vsk->trans; 688 689 kfree(vvs); 690} 691EXPORT_SYMBOL_GPL(virtio_transport_destruct); 692 693static int virtio_transport_reset(struct vsock_sock *vsk, 694 struct virtio_vsock_pkt *pkt) 695{ 696 struct virtio_vsock_pkt_info info = { 697 .op = VIRTIO_VSOCK_OP_RST, 698 .type = VIRTIO_VSOCK_TYPE_STREAM, 699 .reply = !!pkt, 700 .vsk = vsk, 701 }; 702 703 /* Send RST only if the original pkt is not a RST pkt */ 704 if (pkt && le16_to_cpu(pkt->hdr.op) == VIRTIO_VSOCK_OP_RST) 705 return 0; 706 707 return virtio_transport_send_pkt_info(vsk, &info); 708} 709 710/* Normally packets are associated with a socket. There may be no socket if an 711 * attempt was made to connect to a socket that does not exist. 712 */ 713static int virtio_transport_reset_no_sock(const struct virtio_transport *t, 714 struct virtio_vsock_pkt *pkt) 715{ 716 struct virtio_vsock_pkt *reply; 717 struct virtio_vsock_pkt_info info = { 718 .op = VIRTIO_VSOCK_OP_RST, 719 .type = le16_to_cpu(pkt->hdr.type), 720 .reply = true, 721 }; 722 723 /* Send RST only if the original pkt is not a RST pkt */ 724 if (le16_to_cpu(pkt->hdr.op) == VIRTIO_VSOCK_OP_RST) 725 return 0; 726 727 reply = virtio_transport_alloc_pkt(&info, 0, 728 le64_to_cpu(pkt->hdr.dst_cid), 729 le32_to_cpu(pkt->hdr.dst_port), 730 le64_to_cpu(pkt->hdr.src_cid), 731 le32_to_cpu(pkt->hdr.src_port)); 732 if (!reply) 733 return -ENOMEM; 734 735 if (!t) { 736 virtio_transport_free_pkt(reply); 737 return -ENOTCONN; 738 } 739 740 return t->send_pkt(reply); 741} 742 743/* This function should be called with sk_lock held and SOCK_DONE set */ 744static void virtio_transport_remove_sock(struct vsock_sock *vsk) 745{ 746 struct virtio_vsock_sock *vvs = vsk->trans; 747 struct virtio_vsock_pkt *pkt, *tmp; 748 749 /* We don't need to take rx_lock, as the socket is closing and we are 750 * removing it. 751 */ 752 list_for_each_entry_safe(pkt, tmp, &vvs->rx_queue, list) { 753 list_del(&pkt->list); 754 virtio_transport_free_pkt(pkt); 755 } 756 757 vsock_remove_sock(vsk); 758} 759 760static void virtio_transport_wait_close(struct sock *sk, long timeout) 761{ 762 if (timeout) { 763 DEFINE_WAIT_FUNC(wait, woken_wake_function); 764 765 add_wait_queue(sk_sleep(sk), &wait); 766 767 do { 768 if (sk_wait_event(sk, &timeout, 769 sock_flag(sk, SOCK_DONE), &wait)) 770 break; 771 } while (!signal_pending(current) && timeout); 772 773 remove_wait_queue(sk_sleep(sk), &wait); 774 } 775} 776 777static void virtio_transport_do_close(struct vsock_sock *vsk, 778 bool cancel_timeout) 779{ 780 struct sock *sk = sk_vsock(vsk); 781 782 sock_set_flag(sk, SOCK_DONE); 783 vsk->peer_shutdown = SHUTDOWN_MASK; 784 if (vsock_stream_has_data(vsk) <= 0) 785 sk->sk_state = TCP_CLOSING; 786 sk->sk_state_change(sk); 787 788 if (vsk->close_work_scheduled && 789 (!cancel_timeout || cancel_delayed_work(&vsk->close_work))) { 790 vsk->close_work_scheduled = false; 791 792 virtio_transport_remove_sock(vsk); 793 794 /* Release refcnt obtained when we scheduled the timeout */ 795 sock_put(sk); 796 } 797} 798 799static void virtio_transport_close_timeout(struct work_struct *work) 800{ 801 struct vsock_sock *vsk = 802 container_of(work, struct vsock_sock, close_work.work); 803 struct sock *sk = sk_vsock(vsk); 804 805 sock_hold(sk); 806 lock_sock(sk); 807 808 if (!sock_flag(sk, SOCK_DONE)) { 809 (void)virtio_transport_reset(vsk, NULL); 810 811 virtio_transport_do_close(vsk, false); 812 } 813 814 vsk->close_work_scheduled = false; 815 816 release_sock(sk); 817 sock_put(sk); 818} 819 820/* User context, vsk->sk is locked */ 821static bool virtio_transport_close(struct vsock_sock *vsk) 822{ 823 struct sock *sk = &vsk->sk; 824 825 if (!(sk->sk_state == TCP_ESTABLISHED || 826 sk->sk_state == TCP_CLOSING)) 827 return true; 828 829 /* Already received SHUTDOWN from peer, reply with RST */ 830 if ((vsk->peer_shutdown & SHUTDOWN_MASK) == SHUTDOWN_MASK) { 831 (void)virtio_transport_reset(vsk, NULL); 832 return true; 833 } 834 835 if ((sk->sk_shutdown & SHUTDOWN_MASK) != SHUTDOWN_MASK) 836 (void)virtio_transport_shutdown(vsk, SHUTDOWN_MASK); 837 838 if (sock_flag(sk, SOCK_LINGER) && !(current->flags & PF_EXITING)) 839 virtio_transport_wait_close(sk, sk->sk_lingertime); 840 841 if (sock_flag(sk, SOCK_DONE)) { 842 return true; 843 } 844 845 sock_hold(sk); 846 INIT_DELAYED_WORK(&vsk->close_work, 847 virtio_transport_close_timeout); 848 vsk->close_work_scheduled = true; 849 schedule_delayed_work(&vsk->close_work, VSOCK_CLOSE_TIMEOUT); 850 return false; 851} 852 853void virtio_transport_release(struct vsock_sock *vsk) 854{ 855 struct sock *sk = &vsk->sk; 856 bool remove_sock = true; 857 858 if (sk->sk_type == SOCK_STREAM) 859 remove_sock = virtio_transport_close(vsk); 860 861 if (remove_sock) { 862 sock_set_flag(sk, SOCK_DONE); 863 virtio_transport_remove_sock(vsk); 864 } 865} 866EXPORT_SYMBOL_GPL(virtio_transport_release); 867 868static int 869virtio_transport_recv_connecting(struct sock *sk, 870 struct virtio_vsock_pkt *pkt) 871{ 872 struct vsock_sock *vsk = vsock_sk(sk); 873 int err; 874 int skerr; 875 876 switch (le16_to_cpu(pkt->hdr.op)) { 877 case VIRTIO_VSOCK_OP_RESPONSE: 878 sk->sk_state = TCP_ESTABLISHED; 879 sk->sk_socket->state = SS_CONNECTED; 880 vsock_insert_connected(vsk); 881 sk->sk_state_change(sk); 882 break; 883 case VIRTIO_VSOCK_OP_INVALID: 884 break; 885 case VIRTIO_VSOCK_OP_RST: 886 skerr = ECONNRESET; 887 err = 0; 888 goto destroy; 889 default: 890 skerr = EPROTO; 891 err = -EINVAL; 892 goto destroy; 893 } 894 return 0; 895 896destroy: 897 virtio_transport_reset(vsk, pkt); 898 sk->sk_state = TCP_CLOSE; 899 sk->sk_err = skerr; 900 sk->sk_error_report(sk); 901 return err; 902} 903 904static void 905virtio_transport_recv_enqueue(struct vsock_sock *vsk, 906 struct virtio_vsock_pkt *pkt) 907{ 908 struct virtio_vsock_sock *vvs = vsk->trans; 909 bool can_enqueue, free_pkt = false; 910 911 pkt->len = le32_to_cpu(pkt->hdr.len); 912 pkt->off = 0; 913 914 spin_lock_bh(&vvs->rx_lock); 915 916 can_enqueue = virtio_transport_inc_rx_pkt(vvs, pkt); 917 if (!can_enqueue) { 918 free_pkt = true; 919 goto out; 920 } 921 922 /* Try to copy small packets into the buffer of last packet queued, 923 * to avoid wasting memory queueing the entire buffer with a small 924 * payload. 925 */ 926 if (pkt->len <= GOOD_COPY_LEN && !list_empty(&vvs->rx_queue)) { 927 struct virtio_vsock_pkt *last_pkt; 928 929 last_pkt = list_last_entry(&vvs->rx_queue, 930 struct virtio_vsock_pkt, list); 931 932 /* If there is space in the last packet queued, we copy the 933 * new packet in its buffer. 934 */ 935 if (pkt->len <= last_pkt->buf_len - last_pkt->len) { 936 memcpy(last_pkt->buf + last_pkt->len, pkt->buf, 937 pkt->len); 938 last_pkt->len += pkt->len; 939 free_pkt = true; 940 goto out; 941 } 942 } 943 944 list_add_tail(&pkt->list, &vvs->rx_queue); 945 946out: 947 spin_unlock_bh(&vvs->rx_lock); 948 if (free_pkt) 949 virtio_transport_free_pkt(pkt); 950} 951 952static int 953virtio_transport_recv_connected(struct sock *sk, 954 struct virtio_vsock_pkt *pkt) 955{ 956 struct vsock_sock *vsk = vsock_sk(sk); 957 int err = 0; 958 959 switch (le16_to_cpu(pkt->hdr.op)) { 960 case VIRTIO_VSOCK_OP_RW: 961 virtio_transport_recv_enqueue(vsk, pkt); 962 sk->sk_data_ready(sk); 963 return err; 964 case VIRTIO_VSOCK_OP_CREDIT_UPDATE: 965 sk->sk_write_space(sk); 966 break; 967 case VIRTIO_VSOCK_OP_SHUTDOWN: 968 if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SHUTDOWN_RCV) 969 vsk->peer_shutdown |= RCV_SHUTDOWN; 970 if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SHUTDOWN_SEND) 971 vsk->peer_shutdown |= SEND_SHUTDOWN; 972 if (vsk->peer_shutdown == SHUTDOWN_MASK && 973 vsock_stream_has_data(vsk) <= 0 && 974 !sock_flag(sk, SOCK_DONE)) { 975 (void)virtio_transport_reset(vsk, NULL); 976 977 virtio_transport_do_close(vsk, true); 978 } 979 if (le32_to_cpu(pkt->hdr.flags)) 980 sk->sk_state_change(sk); 981 break; 982 case VIRTIO_VSOCK_OP_RST: 983 virtio_transport_do_close(vsk, true); 984 break; 985 default: 986 err = -EINVAL; 987 break; 988 } 989 990 virtio_transport_free_pkt(pkt); 991 return err; 992} 993 994static void 995virtio_transport_recv_disconnecting(struct sock *sk, 996 struct virtio_vsock_pkt *pkt) 997{ 998 struct vsock_sock *vsk = vsock_sk(sk); 999 1000 if (le16_to_cpu(pkt->hdr.op) == VIRTIO_VSOCK_OP_RST) 1001 virtio_transport_do_close(vsk, true); 1002} 1003 1004static int 1005virtio_transport_send_response(struct vsock_sock *vsk, 1006 struct virtio_vsock_pkt *pkt) 1007{ 1008 struct virtio_vsock_pkt_info info = { 1009 .op = VIRTIO_VSOCK_OP_RESPONSE, 1010 .type = VIRTIO_VSOCK_TYPE_STREAM, 1011 .remote_cid = le64_to_cpu(pkt->hdr.src_cid), 1012 .remote_port = le32_to_cpu(pkt->hdr.src_port), 1013 .reply = true, 1014 .vsk = vsk, 1015 }; 1016 1017 return virtio_transport_send_pkt_info(vsk, &info); 1018} 1019 1020static bool virtio_transport_space_update(struct sock *sk, 1021 struct virtio_vsock_pkt *pkt) 1022{ 1023 struct vsock_sock *vsk = vsock_sk(sk); 1024 struct virtio_vsock_sock *vvs = vsk->trans; 1025 bool space_available; 1026 1027 /* Listener sockets are not associated with any transport, so we are 1028 * not able to take the state to see if there is space available in the 1029 * remote peer, but since they are only used to receive requests, we 1030 * can assume that there is always space available in the other peer. 1031 */ 1032 if (!vvs) 1033 return true; 1034 1035 /* buf_alloc and fwd_cnt is always included in the hdr */ 1036 spin_lock_bh(&vvs->tx_lock); 1037 vvs->peer_buf_alloc = le32_to_cpu(pkt->hdr.buf_alloc); 1038 vvs->peer_fwd_cnt = le32_to_cpu(pkt->hdr.fwd_cnt); 1039 space_available = virtio_transport_has_space(vsk); 1040 spin_unlock_bh(&vvs->tx_lock); 1041 return space_available; 1042} 1043 1044/* Handle server socket */ 1045static int 1046virtio_transport_recv_listen(struct sock *sk, struct virtio_vsock_pkt *pkt, 1047 struct virtio_transport *t) 1048{ 1049 struct vsock_sock *vsk = vsock_sk(sk); 1050 struct vsock_sock *vchild; 1051 struct sock *child; 1052 int ret; 1053 1054 if (le16_to_cpu(pkt->hdr.op) != VIRTIO_VSOCK_OP_REQUEST) { 1055 virtio_transport_reset_no_sock(t, pkt); 1056 return -EINVAL; 1057 } 1058 1059 if (sk_acceptq_is_full(sk)) { 1060 virtio_transport_reset_no_sock(t, pkt); 1061 return -ENOMEM; 1062 } 1063 1064 child = vsock_create_connected(sk); 1065 if (!child) { 1066 virtio_transport_reset_no_sock(t, pkt); 1067 return -ENOMEM; 1068 } 1069 1070 sk_acceptq_added(sk); 1071 1072 lock_sock_nested(child, SINGLE_DEPTH_NESTING); 1073 1074 child->sk_state = TCP_ESTABLISHED; 1075 1076 vchild = vsock_sk(child); 1077 vsock_addr_init(&vchild->local_addr, le64_to_cpu(pkt->hdr.dst_cid), 1078 le32_to_cpu(pkt->hdr.dst_port)); 1079 vsock_addr_init(&vchild->remote_addr, le64_to_cpu(pkt->hdr.src_cid), 1080 le32_to_cpu(pkt->hdr.src_port)); 1081 1082 ret = vsock_assign_transport(vchild, vsk); 1083 /* Transport assigned (looking at remote_addr) must be the same 1084 * where we received the request. 1085 */ 1086 if (ret || vchild->transport != &t->transport) { 1087 release_sock(child); 1088 virtio_transport_reset_no_sock(t, pkt); 1089 sock_put(child); 1090 return ret; 1091 } 1092 1093 if (virtio_transport_space_update(child, pkt)) 1094 child->sk_write_space(child); 1095 1096 vsock_insert_connected(vchild); 1097 vsock_enqueue_accept(sk, child); 1098 virtio_transport_send_response(vchild, pkt); 1099 1100 release_sock(child); 1101 1102 sk->sk_data_ready(sk); 1103 return 0; 1104} 1105 1106/* We are under the virtio-vsock's vsock->rx_lock or vhost-vsock's vq->mutex 1107 * lock. 1108 */ 1109void virtio_transport_recv_pkt(struct virtio_transport *t, 1110 struct virtio_vsock_pkt *pkt) 1111{ 1112 struct sockaddr_vm src, dst; 1113 struct vsock_sock *vsk; 1114 struct sock *sk; 1115 bool space_available; 1116 1117 vsock_addr_init(&src, le64_to_cpu(pkt->hdr.src_cid), 1118 le32_to_cpu(pkt->hdr.src_port)); 1119 vsock_addr_init(&dst, le64_to_cpu(pkt->hdr.dst_cid), 1120 le32_to_cpu(pkt->hdr.dst_port)); 1121 1122 trace_virtio_transport_recv_pkt(src.svm_cid, src.svm_port, 1123 dst.svm_cid, dst.svm_port, 1124 le32_to_cpu(pkt->hdr.len), 1125 le16_to_cpu(pkt->hdr.type), 1126 le16_to_cpu(pkt->hdr.op), 1127 le32_to_cpu(pkt->hdr.flags), 1128 le32_to_cpu(pkt->hdr.buf_alloc), 1129 le32_to_cpu(pkt->hdr.fwd_cnt)); 1130 1131 if (le16_to_cpu(pkt->hdr.type) != VIRTIO_VSOCK_TYPE_STREAM) { 1132 (void)virtio_transport_reset_no_sock(t, pkt); 1133 goto free_pkt; 1134 } 1135 1136 /* The socket must be in connected or bound table 1137 * otherwise send reset back 1138 */ 1139 sk = vsock_find_connected_socket(&src, &dst); 1140 if (!sk) { 1141 sk = vsock_find_bound_socket(&dst); 1142 if (!sk) { 1143 (void)virtio_transport_reset_no_sock(t, pkt); 1144 goto free_pkt; 1145 } 1146 } 1147 1148 vsk = vsock_sk(sk); 1149 1150 lock_sock(sk); 1151 1152 /* Check if sk has been closed before lock_sock */ 1153 if (sock_flag(sk, SOCK_DONE)) { 1154 (void)virtio_transport_reset_no_sock(t, pkt); 1155 release_sock(sk); 1156 sock_put(sk); 1157 goto free_pkt; 1158 } 1159 1160 space_available = virtio_transport_space_update(sk, pkt); 1161 1162 /* Update CID in case it has changed after a transport reset event */ 1163 if (vsk->local_addr.svm_cid != VMADDR_CID_ANY) 1164 vsk->local_addr.svm_cid = dst.svm_cid; 1165 1166 if (space_available) 1167 sk->sk_write_space(sk); 1168 1169 switch (sk->sk_state) { 1170 case TCP_LISTEN: 1171 virtio_transport_recv_listen(sk, pkt, t); 1172 virtio_transport_free_pkt(pkt); 1173 break; 1174 case TCP_SYN_SENT: 1175 virtio_transport_recv_connecting(sk, pkt); 1176 virtio_transport_free_pkt(pkt); 1177 break; 1178 case TCP_ESTABLISHED: 1179 virtio_transport_recv_connected(sk, pkt); 1180 break; 1181 case TCP_CLOSING: 1182 virtio_transport_recv_disconnecting(sk, pkt); 1183 virtio_transport_free_pkt(pkt); 1184 break; 1185 default: 1186 (void)virtio_transport_reset_no_sock(t, pkt); 1187 virtio_transport_free_pkt(pkt); 1188 break; 1189 } 1190 1191 release_sock(sk); 1192 1193 /* Release refcnt obtained when we fetched this socket out of the 1194 * bound or connected list. 1195 */ 1196 sock_put(sk); 1197 return; 1198 1199free_pkt: 1200 virtio_transport_free_pkt(pkt); 1201} 1202EXPORT_SYMBOL_GPL(virtio_transport_recv_pkt); 1203 1204void virtio_transport_free_pkt(struct virtio_vsock_pkt *pkt) 1205{ 1206 kvfree(pkt->buf); 1207 kfree(pkt); 1208} 1209EXPORT_SYMBOL_GPL(virtio_transport_free_pkt); 1210 1211MODULE_LICENSE("GPL v2"); 1212MODULE_AUTHOR("Asias He"); 1213MODULE_DESCRIPTION("common code for virtio vsock"); 1214