1/* Copyright Joyent, Inc. and other Node contributors. All rights reserved. 2 * 3 * Permission is hereby granted, free of charge, to any person obtaining a copy 4 * of this software and associated documentation files (the "Software"), to 5 * deal in the Software without restriction, including without limitation the 6 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 7 * sell copies of the Software, and to permit persons to whom the Software is 8 * furnished to do so, subject to the following conditions: 9 * 10 * The above copyright notice and this permission notice shall be included in 11 * all copies or substantial portions of the Software. 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 18 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 19 * IN THE SOFTWARE. 20 */ 21 22#include "uv.h" 23#include "internal.h" 24 25#include <stdlib.h> 26#include <unistd.h> 27#include <assert.h> 28#include <errno.h> 29 30#include <sys/types.h> 31#include <sys/socket.h> 32 33/* ifaddrs is not implemented on AIX and IBM i PASE */ 34#if !defined(_AIX) 35#include <ifaddrs.h> 36#endif 37 38static int maybe_bind_socket(int fd) { 39 union uv__sockaddr s; 40 socklen_t slen; 41 42 slen = sizeof(s); 43 memset(&s, 0, sizeof(s)); 44 45 if (getsockname(fd, &s.addr, &slen)) 46 return UV__ERR(errno); 47 48 if (s.addr.sa_family == AF_INET) 49 if (s.in.sin_port != 0) 50 return 0; /* Already bound to a port. */ 51 52 if (s.addr.sa_family == AF_INET6) 53 if (s.in6.sin6_port != 0) 54 return 0; /* Already bound to a port. */ 55 56 /* Bind to an arbitrary port. */ 57 if (bind(fd, &s.addr, slen)) 58 return UV__ERR(errno); 59 60 return 0; 61} 62 63 64static int new_socket(uv_tcp_t* handle, int domain, unsigned int flags) { 65 int sockfd; 66 int err; 67 68 sockfd = uv__socket(domain, SOCK_STREAM, 0); 69 if (sockfd < 0) 70 return sockfd; 71 72 err = uv__stream_open((uv_stream_t*) handle, sockfd, flags); 73 if (err) { 74 uv__close(sockfd); 75 return err; 76 } 77 78 if (flags & UV_HANDLE_BOUND) 79 return maybe_bind_socket(sockfd); 80 81 return 0; 82} 83 84 85static int maybe_new_socket(uv_tcp_t* handle, int domain, unsigned int flags) { 86 int sockfd; 87 int err; 88 89 if (domain == AF_UNSPEC) 90 goto out; 91 92 sockfd = uv__stream_fd(handle); 93 if (sockfd == -1) 94 return new_socket(handle, domain, flags); 95 96 if (!(flags & UV_HANDLE_BOUND)) 97 goto out; 98 99 if (handle->flags & UV_HANDLE_BOUND) 100 goto out; /* Already bound to a port. */ 101 102 err = maybe_bind_socket(sockfd); 103 if (err) 104 return err; 105 106out: 107 108 handle->flags |= flags; 109 return 0; 110} 111 112 113int uv_tcp_init_ex(uv_loop_t* loop, uv_tcp_t* tcp, unsigned int flags) { 114 int domain; 115 int err; 116 117 /* Use the lower 8 bits for the domain */ 118 domain = flags & 0xFF; 119 if (domain != AF_INET && domain != AF_INET6 && domain != AF_UNSPEC) 120 return UV_EINVAL; 121 122 if (flags & ~0xFF) 123 return UV_EINVAL; 124 125 uv__stream_init(loop, (uv_stream_t*)tcp, UV_TCP); 126 127 /* If anything fails beyond this point we need to remove the handle from 128 * the handle queue, since it was added by uv__handle_init in uv_stream_init. 129 */ 130 131 if (domain != AF_UNSPEC) { 132 err = new_socket(tcp, domain, 0); 133 if (err) { 134 uv__queue_remove(&tcp->handle_queue); 135 if (tcp->io_watcher.fd != -1) 136 uv__close(tcp->io_watcher.fd); 137 tcp->io_watcher.fd = -1; 138 return err; 139 } 140 } 141 142 return 0; 143} 144 145 146int uv_tcp_init(uv_loop_t* loop, uv_tcp_t* tcp) { 147 return uv_tcp_init_ex(loop, tcp, AF_UNSPEC); 148} 149 150 151int uv__tcp_bind(uv_tcp_t* tcp, 152 const struct sockaddr* addr, 153 unsigned int addrlen, 154 unsigned int flags) { 155 int err; 156 int on; 157 158 /* Cannot set IPv6-only mode on non-IPv6 socket. */ 159 if ((flags & UV_TCP_IPV6ONLY) && addr->sa_family != AF_INET6) 160 return UV_EINVAL; 161 162 err = maybe_new_socket(tcp, addr->sa_family, 0); 163 if (err) 164 return err; 165 166 on = 1; 167 if (setsockopt(tcp->io_watcher.fd, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on))) 168 return UV__ERR(errno); 169 170#ifndef __OpenBSD__ 171#ifdef IPV6_V6ONLY 172 if (addr->sa_family == AF_INET6) { 173 on = (flags & UV_TCP_IPV6ONLY) != 0; 174 if (setsockopt(tcp->io_watcher.fd, 175 IPPROTO_IPV6, 176 IPV6_V6ONLY, 177 &on, 178 sizeof on) == -1) { 179#if defined(__MVS__) 180 if (errno == EOPNOTSUPP) 181 return UV_EINVAL; 182#endif 183 return UV__ERR(errno); 184 } 185 } 186#endif 187#endif 188 189 errno = 0; 190 err = bind(tcp->io_watcher.fd, addr, addrlen); 191 if (err == -1 && errno != EADDRINUSE) { 192 if (errno == EAFNOSUPPORT) 193 /* OSX, other BSDs and SunoS fail with EAFNOSUPPORT when binding a 194 * socket created with AF_INET to an AF_INET6 address or vice versa. */ 195 return UV_EINVAL; 196 return UV__ERR(errno); 197 } 198 tcp->delayed_error = (err == -1) ? UV__ERR(errno) : 0; 199 200 tcp->flags |= UV_HANDLE_BOUND; 201 if (addr->sa_family == AF_INET6) 202 tcp->flags |= UV_HANDLE_IPV6; 203 204 return 0; 205} 206 207 208static int uv__is_ipv6_link_local(const struct sockaddr* addr) { 209 const struct sockaddr_in6* a6; 210 uint8_t b[2]; 211 212 if (addr->sa_family != AF_INET6) 213 return 0; 214 215 a6 = (const struct sockaddr_in6*) addr; 216 memcpy(b, &a6->sin6_addr, sizeof(b)); 217 218 return b[0] == 0xFE && b[1] == 0x80; 219} 220 221 222static int uv__ipv6_link_local_scope_id(void) { 223 struct sockaddr_in6* a6; 224 int rv; 225#if defined(_AIX) 226 /* AIX & IBM i do not have ifaddrs 227 * so fallback to use uv_interface_addresses */ 228 uv_interface_address_t* interfaces; 229 uv_interface_address_t* ifa; 230 int count, i; 231 232 if (uv_interface_addresses(&interfaces, &count)) 233 return 0; 234 235 rv = 0; 236 237 for (ifa = interfaces; ifa != &interfaces[count]; ifa++) { 238 if (uv__is_ipv6_link_local((struct sockaddr*) &ifa->address)) { 239 rv = ifa->address.address6.sin6_scope_id; 240 break; 241 } 242 } 243 244 uv_free_interface_addresses(interfaces, count); 245 246#else 247 struct ifaddrs* ifa; 248 struct ifaddrs* p; 249 250 if (getifaddrs(&ifa)) 251 return 0; 252 253 for (p = ifa; p != NULL; p = p->ifa_next) 254 if (p->ifa_addr != NULL) 255 if (uv__is_ipv6_link_local(p->ifa_addr)) 256 break; 257 258 rv = 0; 259 if (p != NULL) { 260 a6 = (struct sockaddr_in6*) p->ifa_addr; 261 rv = a6->sin6_scope_id; 262 } 263 264 freeifaddrs(ifa); 265#endif /* defined(_AIX) */ 266 267 return rv; 268} 269 270 271int uv__tcp_connect(uv_connect_t* req, 272 uv_tcp_t* handle, 273 const struct sockaddr* addr, 274 unsigned int addrlen, 275 uv_connect_cb cb) { 276 struct sockaddr_in6 tmp6; 277 int err; 278 int r; 279 280 assert(handle->type == UV_TCP); 281 282 if (handle->connect_req != NULL) 283 return UV_EALREADY; /* FIXME(bnoordhuis) UV_EINVAL or maybe UV_EBUSY. */ 284 285 if (handle->delayed_error != 0) 286 goto out; 287 288 err = maybe_new_socket(handle, 289 addr->sa_family, 290 UV_HANDLE_READABLE | UV_HANDLE_WRITABLE); 291 if (err) 292 return err; 293 294 if (uv__is_ipv6_link_local(addr)) { 295 memcpy(&tmp6, addr, sizeof(tmp6)); 296 if (tmp6.sin6_scope_id == 0) { 297 tmp6.sin6_scope_id = uv__ipv6_link_local_scope_id(); 298 addr = (void*) &tmp6; 299 } 300 } 301 302 do { 303 errno = 0; 304 r = connect(uv__stream_fd(handle), addr, addrlen); 305 } while (r == -1 && errno == EINTR); 306 307 /* We not only check the return value, but also check the errno != 0. 308 * Because in rare cases connect() will return -1 but the errno 309 * is 0 (for example, on Android 4.3, OnePlus phone A0001_12_150227) 310 * and actually the tcp three-way handshake is completed. 311 */ 312 if (r == -1 && errno != 0) { 313 if (errno == EINPROGRESS) 314 ; /* not an error */ 315 else if (errno == ECONNREFUSED 316#if defined(__OpenBSD__) 317 || errno == EINVAL 318#endif 319 ) 320 /* If we get ECONNREFUSED (Solaris) or EINVAL (OpenBSD) wait until the 321 * next tick to report the error. Solaris and OpenBSD wants to report 322 * immediately -- other unixes want to wait. 323 */ 324 handle->delayed_error = UV__ERR(ECONNREFUSED); 325 else 326 return UV__ERR(errno); 327 } 328 329out: 330 331 uv__req_init(handle->loop, req, UV_CONNECT); 332 req->cb = cb; 333 req->handle = (uv_stream_t*) handle; 334 uv__queue_init(&req->queue); 335 handle->connect_req = req; 336 337 uv__io_start(handle->loop, &handle->io_watcher, POLLOUT); 338 339 if (handle->delayed_error) 340 uv__io_feed(handle->loop, &handle->io_watcher); 341 342 return 0; 343} 344 345 346int uv_tcp_open(uv_tcp_t* handle, uv_os_sock_t sock) { 347 int err; 348 349 if (uv__fd_exists(handle->loop, sock)) 350 return UV_EEXIST; 351 352 err = uv__nonblock(sock, 1); 353 if (err) 354 return err; 355 356 return uv__stream_open((uv_stream_t*)handle, 357 sock, 358 UV_HANDLE_READABLE | UV_HANDLE_WRITABLE); 359} 360 361 362int uv_tcp_getsockname(const uv_tcp_t* handle, 363 struct sockaddr* name, 364 int* namelen) { 365 366 if (handle->delayed_error) 367 return handle->delayed_error; 368 369 return uv__getsockpeername((const uv_handle_t*) handle, 370 getsockname, 371 name, 372 namelen); 373} 374 375 376int uv_tcp_getpeername(const uv_tcp_t* handle, 377 struct sockaddr* name, 378 int* namelen) { 379 380 if (handle->delayed_error) 381 return handle->delayed_error; 382 383 return uv__getsockpeername((const uv_handle_t*) handle, 384 getpeername, 385 name, 386 namelen); 387} 388 389 390int uv_tcp_close_reset(uv_tcp_t* handle, uv_close_cb close_cb) { 391 int fd; 392 struct linger l = { 1, 0 }; 393 394 /* Disallow setting SO_LINGER to zero due to some platform inconsistencies */ 395 if (uv__is_stream_shutting(handle)) 396 return UV_EINVAL; 397 398 fd = uv__stream_fd(handle); 399 if (0 != setsockopt(fd, SOL_SOCKET, SO_LINGER, &l, sizeof(l))) { 400 if (errno == EINVAL) { 401 /* Open Group Specifications Issue 7, 2018 edition states that 402 * EINVAL may mean the socket has been shut down already. 403 * Behavior observed on Solaris, illumos and macOS. */ 404 errno = 0; 405 } else { 406 return UV__ERR(errno); 407 } 408 } 409 410 uv_close((uv_handle_t*) handle, close_cb); 411 return 0; 412} 413 414 415int uv__tcp_listen(uv_tcp_t* tcp, int backlog, uv_connection_cb cb) { 416 unsigned int flags; 417 int err; 418 419 if (tcp->delayed_error) 420 return tcp->delayed_error; 421 422 flags = 0; 423#if defined(__MVS__) 424 /* on zOS the listen call does not bind automatically 425 if the socket is unbound. Hence the manual binding to 426 an arbitrary port is required to be done manually 427 */ 428 flags |= UV_HANDLE_BOUND; 429#endif 430 err = maybe_new_socket(tcp, AF_INET, flags); 431 if (err) 432 return err; 433 434 if (listen(tcp->io_watcher.fd, backlog)) 435 return UV__ERR(errno); 436 437 tcp->connection_cb = cb; 438 tcp->flags |= UV_HANDLE_BOUND; 439 440 /* Start listening for connections. */ 441 tcp->io_watcher.cb = uv__server_io; 442 uv__io_start(tcp->loop, &tcp->io_watcher, POLLIN); 443 444 return 0; 445} 446 447 448int uv__tcp_nodelay(int fd, int on) { 449 if (setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on))) 450 return UV__ERR(errno); 451 return 0; 452} 453 454 455int uv__tcp_keepalive(int fd, int on, unsigned int delay) { 456 int idle; 457 int intvl; 458 int cnt; 459 460 (void) &idle; 461 (void) &intvl; 462 (void) &cnt; 463 464 if (setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, &on, sizeof(on))) 465 return UV__ERR(errno); 466 467 if (!on) 468 return 0; 469 470 if (delay == 0) 471 return -1; 472 473#ifdef __sun 474 /* The implementation of TCP keep-alive on Solaris/SmartOS is a bit unusual 475 * compared to other Unix-like systems. 476 * Thus, we need to specialize it on Solaris. 477 * 478 * There are two keep-alive mechanisms on Solaris: 479 * - By default, the first keep-alive probe is sent out after a TCP connection is idle for two hours. 480 * If the peer does not respond to the probe within eight minutes, the TCP connection is aborted. 481 * You can alter the interval for sending out the first probe using the socket option TCP_KEEPALIVE_THRESHOLD 482 * in milliseconds or TCP_KEEPIDLE in seconds. 483 * The system default is controlled by the TCP ndd parameter tcp_keepalive_interval. The minimum value is ten seconds. 484 * The maximum is ten days, while the default is two hours. If you receive no response to the probe, 485 * you can use the TCP_KEEPALIVE_ABORT_THRESHOLD socket option to change the time threshold for aborting a TCP connection. 486 * The option value is an unsigned integer in milliseconds. The value zero indicates that TCP should never time out and 487 * abort the connection when probing. The system default is controlled by the TCP ndd parameter tcp_keepalive_abort_interval. 488 * The default is eight minutes. 489 * 490 * - The second implementation is activated if socket option TCP_KEEPINTVL and/or TCP_KEEPCNT are set. 491 * The time between each consequent probes is set by TCP_KEEPINTVL in seconds. 492 * The minimum value is ten seconds. The maximum is ten days, while the default is two hours. 493 * The TCP connection will be aborted after certain amount of probes, which is set by TCP_KEEPCNT, without receiving response. 494 */ 495 496 idle = delay; 497 /* Kernel expects at least 10 seconds. */ 498 if (idle < 10) 499 idle = 10; 500 /* Kernel expects at most 10 days. */ 501 if (idle > 10*24*60*60) 502 idle = 10*24*60*60; 503 504 /* `TCP_KEEPIDLE`, `TCP_KEEPINTVL`, and `TCP_KEEPCNT` were not available on Solaris 505 * until version 11.4, but let's take a chance here. */ 506#if defined(TCP_KEEPIDLE) && defined(TCP_KEEPINTVL) && defined(TCP_KEEPCNT) 507 if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPIDLE, &idle, sizeof(idle))) 508 return UV__ERR(errno); 509 510 intvl = idle/3; 511 if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPINTVL, &intvl, sizeof(intvl))) 512 return UV__ERR(errno); 513 514 cnt = 3; 515 if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPCNT, &cnt, sizeof(cnt))) 516 return UV__ERR(errno); 517#else 518 /* Fall back to the first implementation of tcp-alive mechanism for older Solaris, 519 * simulate the tcp-alive mechanism on other platforms via `TCP_KEEPALIVE_THRESHOLD` + `TCP_KEEPALIVE_ABORT_THRESHOLD`. 520 */ 521 idle *= 1000; /* kernel expects milliseconds */ 522 if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPALIVE_THRESHOLD, &idle, sizeof(idle))) 523 return UV__ERR(errno); 524 525 /* Note that the consequent probes will not be sent at equal intervals on Solaris, 526 * but will be sent using the exponential backoff algorithm. */ 527 intvl = idle/3; 528 cnt = 3; 529 int time_to_abort = intvl * cnt; 530 if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPALIVE_ABORT_THRESHOLD, &time_to_abort, sizeof(time_to_abort))) 531 return UV__ERR(errno); 532#endif 533 534#else /* !defined(__sun) */ 535 536#ifdef TCP_KEEPIDLE 537 if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPIDLE, &delay, sizeof(delay))) 538 return UV__ERR(errno); 539#elif defined(TCP_KEEPALIVE) 540 /* Darwin/macOS uses TCP_KEEPALIVE in place of TCP_KEEPIDLE. */ 541 if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPALIVE, &delay, sizeof(delay))) 542 return UV__ERR(errno); 543#endif 544 545#ifdef TCP_KEEPINTVL 546 intvl = 1; /* 1 second; same as default on Win32 */ 547 if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPINTVL, &intvl, sizeof(intvl))) 548 return UV__ERR(errno); 549#endif 550 551#ifdef TCP_KEEPCNT 552 cnt = 10; /* 10 retries; same as hardcoded on Win32 */ 553 if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPCNT, &cnt, sizeof(cnt))) 554 return UV__ERR(errno); 555#endif 556 557#endif /* !defined(__sun) */ 558 return 0; 559} 560 561 562int uv_tcp_nodelay(uv_tcp_t* handle, int on) { 563 int err; 564 565 if (uv__stream_fd(handle) != -1) { 566 err = uv__tcp_nodelay(uv__stream_fd(handle), on); 567 if (err) 568 return err; 569 } 570 571 if (on) 572 handle->flags |= UV_HANDLE_TCP_NODELAY; 573 else 574 handle->flags &= ~UV_HANDLE_TCP_NODELAY; 575 576 return 0; 577} 578 579 580int uv_tcp_keepalive(uv_tcp_t* handle, int on, unsigned int delay) { 581 int err; 582 583 if (uv__stream_fd(handle) != -1) { 584 err =uv__tcp_keepalive(uv__stream_fd(handle), on, delay); 585 if (err) 586 return err; 587 } 588 589 if (on) 590 handle->flags |= UV_HANDLE_TCP_KEEPALIVE; 591 else 592 handle->flags &= ~UV_HANDLE_TCP_KEEPALIVE; 593 594 /* TODO Store delay if uv__stream_fd(handle) == -1 but don't want to enlarge 595 * uv_tcp_t with an int that's almost never used... 596 */ 597 598 return 0; 599} 600 601 602int uv_tcp_simultaneous_accepts(uv_tcp_t* handle, int enable) { 603 return 0; 604} 605 606 607void uv__tcp_close(uv_tcp_t* handle) { 608 uv__stream_close((uv_stream_t*)handle); 609} 610 611 612int uv_socketpair(int type, int protocol, uv_os_sock_t fds[2], int flags0, int flags1) { 613 uv_os_sock_t temp[2]; 614 int err; 615#if defined(__FreeBSD__) || defined(__linux__) 616 int flags; 617 618 flags = type | SOCK_CLOEXEC; 619 if ((flags0 & UV_NONBLOCK_PIPE) && (flags1 & UV_NONBLOCK_PIPE)) 620 flags |= SOCK_NONBLOCK; 621 622 if (socketpair(AF_UNIX, flags, protocol, temp)) 623 return UV__ERR(errno); 624 625 if (flags & UV_FS_O_NONBLOCK) { 626 fds[0] = temp[0]; 627 fds[1] = temp[1]; 628 return 0; 629 } 630#else 631 if (socketpair(AF_UNIX, type, protocol, temp)) 632 return UV__ERR(errno); 633 634 if ((err = uv__cloexec(temp[0], 1))) 635 goto fail; 636 if ((err = uv__cloexec(temp[1], 1))) 637 goto fail; 638#endif 639 640 if (flags0 & UV_NONBLOCK_PIPE) 641 if ((err = uv__nonblock(temp[0], 1))) 642 goto fail; 643 if (flags1 & UV_NONBLOCK_PIPE) 644 if ((err = uv__nonblock(temp[1], 1))) 645 goto fail; 646 647 fds[0] = temp[0]; 648 fds[1] = temp[1]; 649 return 0; 650 651fail: 652 uv__close(temp[0]); 653 uv__close(temp[1]); 654 return err; 655} 656