xref: /third_party/libuv/src/unix/tcp.c (revision e66f31c5)
1/* Copyright Joyent, Inc. and other Node contributors. All rights reserved.
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to
5 * deal in the Software without restriction, including without limitation the
6 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
7 * sell copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
18 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
19 * IN THE SOFTWARE.
20 */
21
22#include "uv.h"
23#include "internal.h"
24
25#include <stdlib.h>
26#include <unistd.h>
27#include <assert.h>
28#include <errno.h>
29
30#include <sys/types.h>
31#include <sys/socket.h>
32
33/* ifaddrs is not implemented on AIX and IBM i PASE */
34#if !defined(_AIX)
35#include <ifaddrs.h>
36#endif
37
38static int maybe_bind_socket(int fd) {
39  union uv__sockaddr s;
40  socklen_t slen;
41
42  slen = sizeof(s);
43  memset(&s, 0, sizeof(s));
44
45  if (getsockname(fd, &s.addr, &slen))
46    return UV__ERR(errno);
47
48  if (s.addr.sa_family == AF_INET)
49    if (s.in.sin_port != 0)
50      return 0;  /* Already bound to a port. */
51
52  if (s.addr.sa_family == AF_INET6)
53    if (s.in6.sin6_port != 0)
54      return 0;  /* Already bound to a port. */
55
56  /* Bind to an arbitrary port. */
57  if (bind(fd, &s.addr, slen))
58    return UV__ERR(errno);
59
60  return 0;
61}
62
63
64static int new_socket(uv_tcp_t* handle, int domain, unsigned int flags) {
65  int sockfd;
66  int err;
67
68  sockfd = uv__socket(domain, SOCK_STREAM, 0);
69  if (sockfd < 0)
70    return sockfd;
71
72  err = uv__stream_open((uv_stream_t*) handle, sockfd, flags);
73  if (err) {
74    uv__close(sockfd);
75    return err;
76  }
77
78  if (flags & UV_HANDLE_BOUND)
79    return maybe_bind_socket(sockfd);
80
81  return 0;
82}
83
84
85static int maybe_new_socket(uv_tcp_t* handle, int domain, unsigned int flags) {
86  int sockfd;
87  int err;
88
89  if (domain == AF_UNSPEC)
90    goto out;
91
92  sockfd = uv__stream_fd(handle);
93  if (sockfd == -1)
94    return new_socket(handle, domain, flags);
95
96  if (!(flags & UV_HANDLE_BOUND))
97    goto out;
98
99  if (handle->flags & UV_HANDLE_BOUND)
100    goto out;  /* Already bound to a port. */
101
102  err = maybe_bind_socket(sockfd);
103  if (err)
104    return err;
105
106out:
107
108  handle->flags |= flags;
109  return 0;
110}
111
112
113int uv_tcp_init_ex(uv_loop_t* loop, uv_tcp_t* tcp, unsigned int flags) {
114  int domain;
115  int err;
116
117  /* Use the lower 8 bits for the domain */
118  domain = flags & 0xFF;
119  if (domain != AF_INET && domain != AF_INET6 && domain != AF_UNSPEC)
120    return UV_EINVAL;
121
122  if (flags & ~0xFF)
123    return UV_EINVAL;
124
125  uv__stream_init(loop, (uv_stream_t*)tcp, UV_TCP);
126
127  /* If anything fails beyond this point we need to remove the handle from
128   * the handle queue, since it was added by uv__handle_init in uv_stream_init.
129   */
130
131  if (domain != AF_UNSPEC) {
132    err = new_socket(tcp, domain, 0);
133    if (err) {
134      uv__queue_remove(&tcp->handle_queue);
135      if (tcp->io_watcher.fd != -1)
136        uv__close(tcp->io_watcher.fd);
137      tcp->io_watcher.fd = -1;
138      return err;
139    }
140  }
141
142  return 0;
143}
144
145
146int uv_tcp_init(uv_loop_t* loop, uv_tcp_t* tcp) {
147  return uv_tcp_init_ex(loop, tcp, AF_UNSPEC);
148}
149
150
151int uv__tcp_bind(uv_tcp_t* tcp,
152                 const struct sockaddr* addr,
153                 unsigned int addrlen,
154                 unsigned int flags) {
155  int err;
156  int on;
157
158  /* Cannot set IPv6-only mode on non-IPv6 socket. */
159  if ((flags & UV_TCP_IPV6ONLY) && addr->sa_family != AF_INET6)
160    return UV_EINVAL;
161
162  err = maybe_new_socket(tcp, addr->sa_family, 0);
163  if (err)
164    return err;
165
166  on = 1;
167  if (setsockopt(tcp->io_watcher.fd, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)))
168    return UV__ERR(errno);
169
170#ifndef __OpenBSD__
171#ifdef IPV6_V6ONLY
172  if (addr->sa_family == AF_INET6) {
173    on = (flags & UV_TCP_IPV6ONLY) != 0;
174    if (setsockopt(tcp->io_watcher.fd,
175                   IPPROTO_IPV6,
176                   IPV6_V6ONLY,
177                   &on,
178                   sizeof on) == -1) {
179#if defined(__MVS__)
180      if (errno == EOPNOTSUPP)
181        return UV_EINVAL;
182#endif
183      return UV__ERR(errno);
184    }
185  }
186#endif
187#endif
188
189  errno = 0;
190  err = bind(tcp->io_watcher.fd, addr, addrlen);
191  if (err == -1 && errno != EADDRINUSE) {
192    if (errno == EAFNOSUPPORT)
193      /* OSX, other BSDs and SunoS fail with EAFNOSUPPORT when binding a
194       * socket created with AF_INET to an AF_INET6 address or vice versa. */
195      return UV_EINVAL;
196    return UV__ERR(errno);
197  }
198  tcp->delayed_error = (err == -1) ? UV__ERR(errno) : 0;
199
200  tcp->flags |= UV_HANDLE_BOUND;
201  if (addr->sa_family == AF_INET6)
202    tcp->flags |= UV_HANDLE_IPV6;
203
204  return 0;
205}
206
207
208static int uv__is_ipv6_link_local(const struct sockaddr* addr) {
209  const struct sockaddr_in6* a6;
210  uint8_t b[2];
211
212  if (addr->sa_family != AF_INET6)
213    return 0;
214
215  a6 = (const struct sockaddr_in6*) addr;
216  memcpy(b, &a6->sin6_addr, sizeof(b));
217
218  return b[0] == 0xFE && b[1] == 0x80;
219}
220
221
222static int uv__ipv6_link_local_scope_id(void) {
223  struct sockaddr_in6* a6;
224  int rv;
225#if defined(_AIX)
226  /* AIX & IBM i do not have ifaddrs
227   * so fallback to use uv_interface_addresses */
228  uv_interface_address_t* interfaces;
229  uv_interface_address_t* ifa;
230  int count, i;
231
232  if (uv_interface_addresses(&interfaces, &count))
233    return 0;
234
235  rv = 0;
236
237  for (ifa = interfaces; ifa != &interfaces[count]; ifa++) {
238    if (uv__is_ipv6_link_local((struct sockaddr*) &ifa->address)) {
239      rv = ifa->address.address6.sin6_scope_id;
240      break;
241    }
242  }
243
244  uv_free_interface_addresses(interfaces, count);
245
246#else
247  struct ifaddrs* ifa;
248  struct ifaddrs* p;
249
250  if (getifaddrs(&ifa))
251    return 0;
252
253  for (p = ifa; p != NULL; p = p->ifa_next)
254    if (p->ifa_addr != NULL)
255      if (uv__is_ipv6_link_local(p->ifa_addr))
256        break;
257
258  rv = 0;
259  if (p != NULL) {
260    a6 = (struct sockaddr_in6*) p->ifa_addr;
261    rv = a6->sin6_scope_id;
262  }
263
264  freeifaddrs(ifa);
265#endif /* defined(_AIX) */
266
267  return rv;
268}
269
270
271int uv__tcp_connect(uv_connect_t* req,
272                    uv_tcp_t* handle,
273                    const struct sockaddr* addr,
274                    unsigned int addrlen,
275                    uv_connect_cb cb) {
276  struct sockaddr_in6 tmp6;
277  int err;
278  int r;
279
280  assert(handle->type == UV_TCP);
281
282  if (handle->connect_req != NULL)
283    return UV_EALREADY;  /* FIXME(bnoordhuis) UV_EINVAL or maybe UV_EBUSY. */
284
285  if (handle->delayed_error != 0)
286    goto out;
287
288  err = maybe_new_socket(handle,
289                         addr->sa_family,
290                         UV_HANDLE_READABLE | UV_HANDLE_WRITABLE);
291  if (err)
292    return err;
293
294  if (uv__is_ipv6_link_local(addr)) {
295    memcpy(&tmp6, addr, sizeof(tmp6));
296    if (tmp6.sin6_scope_id == 0) {
297      tmp6.sin6_scope_id = uv__ipv6_link_local_scope_id();
298      addr = (void*) &tmp6;
299    }
300  }
301
302  do {
303    errno = 0;
304    r = connect(uv__stream_fd(handle), addr, addrlen);
305  } while (r == -1 && errno == EINTR);
306
307  /* We not only check the return value, but also check the errno != 0.
308   * Because in rare cases connect() will return -1 but the errno
309   * is 0 (for example, on Android 4.3, OnePlus phone A0001_12_150227)
310   * and actually the tcp three-way handshake is completed.
311   */
312  if (r == -1 && errno != 0) {
313    if (errno == EINPROGRESS)
314      ; /* not an error */
315    else if (errno == ECONNREFUSED
316#if defined(__OpenBSD__)
317      || errno == EINVAL
318#endif
319      )
320    /* If we get ECONNREFUSED (Solaris) or EINVAL (OpenBSD) wait until the
321     * next tick to report the error. Solaris and OpenBSD wants to report
322     * immediately -- other unixes want to wait.
323     */
324      handle->delayed_error = UV__ERR(ECONNREFUSED);
325    else
326      return UV__ERR(errno);
327  }
328
329out:
330
331  uv__req_init(handle->loop, req, UV_CONNECT);
332  req->cb = cb;
333  req->handle = (uv_stream_t*) handle;
334  uv__queue_init(&req->queue);
335  handle->connect_req = req;
336
337  uv__io_start(handle->loop, &handle->io_watcher, POLLOUT);
338
339  if (handle->delayed_error)
340    uv__io_feed(handle->loop, &handle->io_watcher);
341
342  return 0;
343}
344
345
346int uv_tcp_open(uv_tcp_t* handle, uv_os_sock_t sock) {
347  int err;
348
349  if (uv__fd_exists(handle->loop, sock))
350    return UV_EEXIST;
351
352  err = uv__nonblock(sock, 1);
353  if (err)
354    return err;
355
356  return uv__stream_open((uv_stream_t*)handle,
357                         sock,
358                         UV_HANDLE_READABLE | UV_HANDLE_WRITABLE);
359}
360
361
362int uv_tcp_getsockname(const uv_tcp_t* handle,
363                       struct sockaddr* name,
364                       int* namelen) {
365
366  if (handle->delayed_error)
367    return handle->delayed_error;
368
369  return uv__getsockpeername((const uv_handle_t*) handle,
370                             getsockname,
371                             name,
372                             namelen);
373}
374
375
376int uv_tcp_getpeername(const uv_tcp_t* handle,
377                       struct sockaddr* name,
378                       int* namelen) {
379
380  if (handle->delayed_error)
381    return handle->delayed_error;
382
383  return uv__getsockpeername((const uv_handle_t*) handle,
384                             getpeername,
385                             name,
386                             namelen);
387}
388
389
390int uv_tcp_close_reset(uv_tcp_t* handle, uv_close_cb close_cb) {
391  int fd;
392  struct linger l = { 1, 0 };
393
394  /* Disallow setting SO_LINGER to zero due to some platform inconsistencies */
395  if (uv__is_stream_shutting(handle))
396    return UV_EINVAL;
397
398  fd = uv__stream_fd(handle);
399  if (0 != setsockopt(fd, SOL_SOCKET, SO_LINGER, &l, sizeof(l))) {
400    if (errno == EINVAL) {
401      /* Open Group Specifications Issue 7, 2018 edition states that
402       * EINVAL may mean the socket has been shut down already.
403       * Behavior observed on Solaris, illumos and macOS. */
404      errno = 0;
405    } else {
406      return UV__ERR(errno);
407    }
408  }
409
410  uv_close((uv_handle_t*) handle, close_cb);
411  return 0;
412}
413
414
415int uv__tcp_listen(uv_tcp_t* tcp, int backlog, uv_connection_cb cb) {
416  unsigned int flags;
417  int err;
418
419  if (tcp->delayed_error)
420    return tcp->delayed_error;
421
422  flags = 0;
423#if defined(__MVS__)
424  /* on zOS the listen call does not bind automatically
425     if the socket is unbound. Hence the manual binding to
426     an arbitrary port is required to be done manually
427  */
428  flags |= UV_HANDLE_BOUND;
429#endif
430  err = maybe_new_socket(tcp, AF_INET, flags);
431  if (err)
432    return err;
433
434  if (listen(tcp->io_watcher.fd, backlog))
435    return UV__ERR(errno);
436
437  tcp->connection_cb = cb;
438  tcp->flags |= UV_HANDLE_BOUND;
439
440  /* Start listening for connections. */
441  tcp->io_watcher.cb = uv__server_io;
442  uv__io_start(tcp->loop, &tcp->io_watcher, POLLIN);
443
444  return 0;
445}
446
447
448int uv__tcp_nodelay(int fd, int on) {
449  if (setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on)))
450    return UV__ERR(errno);
451  return 0;
452}
453
454
455int uv__tcp_keepalive(int fd, int on, unsigned int delay) {
456  int idle;
457  int intvl;
458  int cnt;
459
460  (void) &idle;
461  (void) &intvl;
462  (void) &cnt;
463
464  if (setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, &on, sizeof(on)))
465    return UV__ERR(errno);
466
467  if (!on)
468    return 0;
469
470  if (delay == 0)
471    return -1;
472
473#ifdef __sun
474  /* The implementation of TCP keep-alive on Solaris/SmartOS is a bit unusual
475   * compared to other Unix-like systems.
476   * Thus, we need to specialize it on Solaris.
477   *
478   * There are two keep-alive mechanisms on Solaris:
479   * - By default, the first keep-alive probe is sent out after a TCP connection is idle for two hours.
480   * If the peer does not respond to the probe within eight minutes, the TCP connection is aborted.
481   * You can alter the interval for sending out the first probe using the socket option TCP_KEEPALIVE_THRESHOLD
482   * in milliseconds or TCP_KEEPIDLE in seconds.
483   * The system default is controlled by the TCP ndd parameter tcp_keepalive_interval. The minimum value is ten seconds.
484   * The maximum is ten days, while the default is two hours. If you receive no response to the probe,
485   * you can use the TCP_KEEPALIVE_ABORT_THRESHOLD socket option to change the time threshold for aborting a TCP connection.
486   * The option value is an unsigned integer in milliseconds. The value zero indicates that TCP should never time out and
487   * abort the connection when probing. The system default is controlled by the TCP ndd parameter tcp_keepalive_abort_interval.
488   * The default is eight minutes.
489   *
490   * - The second implementation is activated if socket option TCP_KEEPINTVL and/or TCP_KEEPCNT are set.
491   * The time between each consequent probes is set by TCP_KEEPINTVL in seconds.
492   * The minimum value is ten seconds. The maximum is ten days, while the default is two hours.
493   * The TCP connection will be aborted after certain amount of probes, which is set by TCP_KEEPCNT, without receiving response.
494   */
495
496  idle = delay;
497  /* Kernel expects at least 10 seconds. */
498  if (idle < 10)
499    idle = 10;
500  /* Kernel expects at most 10 days. */
501  if (idle > 10*24*60*60)
502    idle = 10*24*60*60;
503
504  /* `TCP_KEEPIDLE`, `TCP_KEEPINTVL`, and `TCP_KEEPCNT` were not available on Solaris
505   * until version 11.4, but let's take a chance here. */
506#if defined(TCP_KEEPIDLE) && defined(TCP_KEEPINTVL) && defined(TCP_KEEPCNT)
507  if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPIDLE, &idle, sizeof(idle)))
508    return UV__ERR(errno);
509
510  intvl = idle/3;
511  if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPINTVL, &intvl, sizeof(intvl)))
512    return UV__ERR(errno);
513
514  cnt = 3;
515  if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPCNT, &cnt, sizeof(cnt)))
516    return UV__ERR(errno);
517#else
518  /* Fall back to the first implementation of tcp-alive mechanism for older Solaris,
519   * simulate the tcp-alive mechanism on other platforms via `TCP_KEEPALIVE_THRESHOLD` + `TCP_KEEPALIVE_ABORT_THRESHOLD`.
520   */
521  idle *= 1000; /* kernel expects milliseconds */
522  if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPALIVE_THRESHOLD, &idle, sizeof(idle)))
523    return UV__ERR(errno);
524
525  /* Note that the consequent probes will not be sent at equal intervals on Solaris,
526   * but will be sent using the exponential backoff algorithm. */
527  intvl = idle/3;
528  cnt = 3;
529  int time_to_abort = intvl * cnt;
530  if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPALIVE_ABORT_THRESHOLD, &time_to_abort, sizeof(time_to_abort)))
531    return UV__ERR(errno);
532#endif
533
534#else  /* !defined(__sun) */
535
536#ifdef TCP_KEEPIDLE
537  if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPIDLE, &delay, sizeof(delay)))
538    return UV__ERR(errno);
539#elif defined(TCP_KEEPALIVE)
540  /* Darwin/macOS uses TCP_KEEPALIVE in place of TCP_KEEPIDLE. */
541  if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPALIVE, &delay, sizeof(delay)))
542    return UV__ERR(errno);
543#endif
544
545#ifdef TCP_KEEPINTVL
546  intvl = 1;  /*  1 second; same as default on Win32 */
547  if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPINTVL, &intvl, sizeof(intvl)))
548    return UV__ERR(errno);
549#endif
550
551#ifdef TCP_KEEPCNT
552  cnt = 10;  /* 10 retries; same as hardcoded on Win32 */
553  if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPCNT, &cnt, sizeof(cnt)))
554    return UV__ERR(errno);
555#endif
556
557#endif  /* !defined(__sun) */
558  return 0;
559}
560
561
562int uv_tcp_nodelay(uv_tcp_t* handle, int on) {
563  int err;
564
565  if (uv__stream_fd(handle) != -1) {
566    err = uv__tcp_nodelay(uv__stream_fd(handle), on);
567    if (err)
568      return err;
569  }
570
571  if (on)
572    handle->flags |= UV_HANDLE_TCP_NODELAY;
573  else
574    handle->flags &= ~UV_HANDLE_TCP_NODELAY;
575
576  return 0;
577}
578
579
580int uv_tcp_keepalive(uv_tcp_t* handle, int on, unsigned int delay) {
581  int err;
582
583  if (uv__stream_fd(handle) != -1) {
584    err =uv__tcp_keepalive(uv__stream_fd(handle), on, delay);
585    if (err)
586      return err;
587  }
588
589  if (on)
590    handle->flags |= UV_HANDLE_TCP_KEEPALIVE;
591  else
592    handle->flags &= ~UV_HANDLE_TCP_KEEPALIVE;
593
594  /* TODO Store delay if uv__stream_fd(handle) == -1 but don't want to enlarge
595   *      uv_tcp_t with an int that's almost never used...
596   */
597
598  return 0;
599}
600
601
602int uv_tcp_simultaneous_accepts(uv_tcp_t* handle, int enable) {
603  return 0;
604}
605
606
607void uv__tcp_close(uv_tcp_t* handle) {
608  uv__stream_close((uv_stream_t*)handle);
609}
610
611
612int uv_socketpair(int type, int protocol, uv_os_sock_t fds[2], int flags0, int flags1) {
613  uv_os_sock_t temp[2];
614  int err;
615#if defined(__FreeBSD__) || defined(__linux__)
616  int flags;
617
618  flags = type | SOCK_CLOEXEC;
619  if ((flags0 & UV_NONBLOCK_PIPE) && (flags1 & UV_NONBLOCK_PIPE))
620    flags |= SOCK_NONBLOCK;
621
622  if (socketpair(AF_UNIX, flags, protocol, temp))
623    return UV__ERR(errno);
624
625  if (flags & UV_FS_O_NONBLOCK) {
626    fds[0] = temp[0];
627    fds[1] = temp[1];
628    return 0;
629  }
630#else
631  if (socketpair(AF_UNIX, type, protocol, temp))
632    return UV__ERR(errno);
633
634  if ((err = uv__cloexec(temp[0], 1)))
635    goto fail;
636  if ((err = uv__cloexec(temp[1], 1)))
637    goto fail;
638#endif
639
640  if (flags0 & UV_NONBLOCK_PIPE)
641    if ((err = uv__nonblock(temp[0], 1)))
642        goto fail;
643  if (flags1 & UV_NONBLOCK_PIPE)
644    if ((err = uv__nonblock(temp[1], 1)))
645      goto fail;
646
647  fds[0] = temp[0];
648  fds[1] = temp[1];
649  return 0;
650
651fail:
652  uv__close(temp[0]);
653  uv__close(temp[1]);
654  return err;
655}
656