xref: /third_party/libuv/src/unix/kqueue.c (revision e66f31c5)
1/* Copyright Joyent, Inc. and other Node contributors. All rights reserved.
2 * Permission is hereby granted, free of charge, to any person obtaining a copy
3 * of this software and associated documentation files (the "Software"), to
4 * deal in the Software without restriction, including without limitation the
5 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
6 * sell copies of the Software, and to permit persons to whom the Software is
7 * furnished to do so, subject to the following conditions:
8 *
9 * The above copyright notice and this permission notice shall be included in
10 * all copies or substantial portions of the Software.
11 *
12 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
13 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
14 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
15 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
16 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
17 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
18 * IN THE SOFTWARE.
19 */
20
21#include "uv.h"
22#include "internal.h"
23
24#include <assert.h>
25#include <stdlib.h>
26#include <string.h>
27#include <errno.h>
28
29#include <sys/sysctl.h>
30#include <sys/types.h>
31#include <sys/event.h>
32#include <sys/time.h>
33#if defined(__FreeBSD__)
34#include <sys/user.h>
35#endif
36#include <unistd.h>
37#include <fcntl.h>
38#include <time.h>
39
40/*
41 * Required on
42 * - Until at least FreeBSD 11.0
43 * - Older versions of Mac OS X
44 *
45 * http://www.boost.org/doc/libs/1_61_0/boost/asio/detail/kqueue_reactor.hpp
46 */
47#ifndef EV_OOBAND
48#define EV_OOBAND  EV_FLAG1
49#endif
50
51static void uv__fs_event(uv_loop_t* loop, uv__io_t* w, unsigned int fflags);
52
53
54int uv__kqueue_init(uv_loop_t* loop) {
55  loop->backend_fd = kqueue();
56  if (loop->backend_fd == -1)
57    return UV__ERR(errno);
58
59  uv__cloexec(loop->backend_fd, 1);
60
61  return 0;
62}
63
64
65#if defined(__APPLE__) && MAC_OS_X_VERSION_MAX_ALLOWED >= 1070
66static _Atomic int uv__has_forked_with_cfrunloop;
67#endif
68
69int uv__io_fork(uv_loop_t* loop) {
70  int err;
71  loop->backend_fd = -1;
72  err = uv__kqueue_init(loop);
73  if (err)
74    return err;
75
76#if defined(__APPLE__) && MAC_OS_X_VERSION_MAX_ALLOWED >= 1070
77  if (loop->cf_state != NULL) {
78    /* We cannot start another CFRunloop and/or thread in the child
79       process; CF aborts if you try or if you try to touch the thread
80       at all to kill it. So the best we can do is ignore it from now
81       on. This means we can't watch directories in the same way
82       anymore (like other BSDs). It also means we cannot properly
83       clean up the allocated resources; calling
84       uv__fsevents_loop_delete from uv_loop_close will crash the
85       process. So we sidestep the issue by pretending like we never
86       started it in the first place.
87    */
88    atomic_store_explicit(&uv__has_forked_with_cfrunloop,
89                          1,
90                          memory_order_relaxed);
91    uv__free(loop->cf_state);
92    loop->cf_state = NULL;
93  }
94#endif /* #if defined(__APPLE__) && MAC_OS_X_VERSION_MAX_ALLOWED >= 1070 */
95  return err;
96}
97
98
99int uv__io_check_fd(uv_loop_t* loop, int fd) {
100  struct kevent ev;
101  int rc;
102
103  rc = 0;
104  EV_SET(&ev, fd, EVFILT_READ, EV_ADD, 0, 0, 0);
105  if (kevent(loop->backend_fd, &ev, 1, NULL, 0, NULL))
106    rc = UV__ERR(errno);
107
108  EV_SET(&ev, fd, EVFILT_READ, EV_DELETE, 0, 0, 0);
109  if (rc == 0)
110    if (kevent(loop->backend_fd, &ev, 1, NULL, 0, NULL))
111      abort();
112
113  return rc;
114}
115
116
117static void uv__kqueue_delete(int kqfd, const struct kevent *ev) {
118  struct kevent change;
119
120  EV_SET(&change, ev->ident, ev->filter, EV_DELETE, 0, 0, 0);
121
122  if (0 == kevent(kqfd, &change, 1, NULL, 0, NULL))
123    return;
124
125  if (errno == EBADF || errno == ENOENT)
126    return;
127
128  abort();
129}
130
131
132void uv__io_poll(uv_loop_t* loop, int timeout) {
133  uv__loop_internal_fields_t* lfields;
134  struct kevent events[1024];
135  struct kevent* ev;
136  struct timespec spec;
137  unsigned int nevents;
138  unsigned int revents;
139  struct uv__queue* q;
140  uv__io_t* w;
141  uv_process_t* process;
142  sigset_t* pset;
143  sigset_t set;
144  uint64_t base;
145  uint64_t diff;
146  int have_signals;
147  int filter;
148  int fflags;
149  int count;
150  int nfds;
151  int fd;
152  int op;
153  int i;
154  int user_timeout;
155  int reset_timeout;
156
157  if (loop->nfds == 0) {
158    assert(uv__queue_empty(&loop->watcher_queue));
159    return;
160  }
161
162  lfields = uv__get_internal_fields(loop);
163  nevents = 0;
164
165  while (!uv__queue_empty(&loop->watcher_queue)) {
166    q = uv__queue_head(&loop->watcher_queue);
167    uv__queue_remove(q);
168    uv__queue_init(q);
169
170    w = uv__queue_data(q, uv__io_t, watcher_queue);
171    assert(w->pevents != 0);
172    assert(w->fd >= 0);
173    assert(w->fd < (int) loop->nwatchers);
174
175    if ((w->events & POLLIN) == 0 && (w->pevents & POLLIN) != 0) {
176      filter = EVFILT_READ;
177      fflags = 0;
178      op = EV_ADD;
179
180      if (w->cb == uv__fs_event) {
181        filter = EVFILT_VNODE;
182        fflags = NOTE_ATTRIB | NOTE_WRITE  | NOTE_RENAME
183               | NOTE_DELETE | NOTE_EXTEND | NOTE_REVOKE;
184        op = EV_ADD | EV_ONESHOT; /* Stop the event from firing repeatedly. */
185      }
186
187      EV_SET(events + nevents, w->fd, filter, op, fflags, 0, 0);
188
189      if (++nevents == ARRAY_SIZE(events)) {
190        if (kevent(loop->backend_fd, events, nevents, NULL, 0, NULL))
191          abort();
192        nevents = 0;
193      }
194    }
195
196    if ((w->events & POLLOUT) == 0 && (w->pevents & POLLOUT) != 0) {
197      EV_SET(events + nevents, w->fd, EVFILT_WRITE, EV_ADD, 0, 0, 0);
198
199      if (++nevents == ARRAY_SIZE(events)) {
200        if (kevent(loop->backend_fd, events, nevents, NULL, 0, NULL))
201          abort();
202        nevents = 0;
203      }
204    }
205
206   if ((w->events & UV__POLLPRI) == 0 && (w->pevents & UV__POLLPRI) != 0) {
207      EV_SET(events + nevents, w->fd, EV_OOBAND, EV_ADD, 0, 0, 0);
208
209      if (++nevents == ARRAY_SIZE(events)) {
210        if (kevent(loop->backend_fd, events, nevents, NULL, 0, NULL))
211          abort();
212        nevents = 0;
213      }
214    }
215
216    w->events = w->pevents;
217  }
218
219  pset = NULL;
220  if (loop->flags & UV_LOOP_BLOCK_SIGPROF) {
221    pset = &set;
222    sigemptyset(pset);
223    sigaddset(pset, SIGPROF);
224  }
225
226  assert(timeout >= -1);
227  base = loop->time;
228  count = 48; /* Benchmarks suggest this gives the best throughput. */
229
230  if (lfields->flags & UV_METRICS_IDLE_TIME) {
231    reset_timeout = 1;
232    user_timeout = timeout;
233    timeout = 0;
234  } else {
235    reset_timeout = 0;
236  }
237
238  for (;; nevents = 0) {
239    /* Only need to set the provider_entry_time if timeout != 0. The function
240     * will return early if the loop isn't configured with UV_METRICS_IDLE_TIME.
241     */
242    if (timeout != 0)
243      uv__metrics_set_provider_entry_time(loop);
244
245    if (timeout != -1) {
246      spec.tv_sec = timeout / 1000;
247      spec.tv_nsec = (timeout % 1000) * 1000000;
248    }
249
250    if (pset != NULL)
251      pthread_sigmask(SIG_BLOCK, pset, NULL);
252
253    /* Store the current timeout in a location that's globally accessible so
254     * other locations like uv__work_done() can determine whether the queue
255     * of events in the callback were waiting when poll was called.
256     */
257    lfields->current_timeout = timeout;
258
259    nfds = kevent(loop->backend_fd,
260                  events,
261                  nevents,
262                  events,
263                  ARRAY_SIZE(events),
264                  timeout == -1 ? NULL : &spec);
265
266    if (nfds == -1)
267      assert(errno == EINTR);
268    else if (nfds == 0)
269      /* Unlimited timeout should only return with events or signal. */
270      assert(timeout != -1);
271
272    if (pset != NULL)
273      pthread_sigmask(SIG_UNBLOCK, pset, NULL);
274
275    /* Update loop->time unconditionally. It's tempting to skip the update when
276     * timeout == 0 (i.e. non-blocking poll) but there is no guarantee that the
277     * operating system didn't reschedule our process while in the syscall.
278     */
279    uv__update_time(loop);
280
281    if (nfds == 0 || nfds == -1) {
282      /* If kqueue is empty or interrupted, we might still have children ready
283       * to reap immediately. */
284      if (loop->flags & UV_LOOP_REAP_CHILDREN) {
285        loop->flags &= ~UV_LOOP_REAP_CHILDREN;
286        uv__wait_children(loop);
287        assert((reset_timeout == 0 ? timeout : user_timeout) == 0);
288        return; /* Equivalent to fall-through behavior. */
289      }
290
291      if (reset_timeout != 0) {
292        timeout = user_timeout;
293        reset_timeout = 0;
294      } else if (nfds == 0) {
295        return;
296      }
297
298      /* Interrupted by a signal. Update timeout and poll again. */
299      goto update_timeout;
300    }
301
302    have_signals = 0;
303    nevents = 0;
304
305    assert(loop->watchers != NULL);
306    loop->watchers[loop->nwatchers] = (void*) events;
307    loop->watchers[loop->nwatchers + 1] = (void*) (uintptr_t) nfds;
308    for (i = 0; i < nfds; i++) {
309      ev = events + i;
310      fd = ev->ident;
311
312      /* Handle kevent NOTE_EXIT results */
313      if (ev->filter == EVFILT_PROC) {
314        uv__queue_foreach(q, &loop->process_handles) {
315          process = uv__queue_data(q, uv_process_t, queue);
316          if (process->pid == fd) {
317            process->flags |= UV_HANDLE_REAP;
318            loop->flags |= UV_LOOP_REAP_CHILDREN;
319            break;
320          }
321        }
322        nevents++;
323        continue;
324      }
325
326      /* Skip invalidated events, see uv__platform_invalidate_fd */
327      if (fd == -1)
328        continue;
329      w = loop->watchers[fd];
330
331      if (w == NULL) {
332        /* File descriptor that we've stopped watching, disarm it. */
333        uv__kqueue_delete(loop->backend_fd, ev);
334        continue;
335      }
336
337      if (ev->filter == EVFILT_VNODE) {
338        assert(w->events == POLLIN);
339        assert(w->pevents == POLLIN);
340        uv__metrics_update_idle_time(loop);
341        w->cb(loop, w, ev->fflags); /* XXX always uv__fs_event() */
342        nevents++;
343        continue;
344      }
345
346      revents = 0;
347
348      if (ev->filter == EVFILT_READ) {
349        if (w->pevents & POLLIN)
350          revents |= POLLIN;
351        else
352          uv__kqueue_delete(loop->backend_fd, ev);
353
354        if ((ev->flags & EV_EOF) && (w->pevents & UV__POLLRDHUP))
355          revents |= UV__POLLRDHUP;
356      }
357
358      if (ev->filter == EV_OOBAND) {
359        if (w->pevents & UV__POLLPRI)
360          revents |= UV__POLLPRI;
361        else
362          uv__kqueue_delete(loop->backend_fd, ev);
363      }
364
365      if (ev->filter == EVFILT_WRITE) {
366        if (w->pevents & POLLOUT)
367          revents |= POLLOUT;
368        else
369          uv__kqueue_delete(loop->backend_fd, ev);
370      }
371
372      if (ev->flags & EV_ERROR)
373        revents |= POLLERR;
374
375      if (revents == 0)
376        continue;
377
378      /* Run signal watchers last.  This also affects child process watchers
379       * because those are implemented in terms of signal watchers.
380       */
381      if (w == &loop->signal_io_watcher) {
382        have_signals = 1;
383      } else {
384        uv__metrics_update_idle_time(loop);
385        w->cb(loop, w, revents);
386      }
387
388      nevents++;
389    }
390
391    if (loop->flags & UV_LOOP_REAP_CHILDREN) {
392      loop->flags &= ~UV_LOOP_REAP_CHILDREN;
393      uv__wait_children(loop);
394    }
395
396    uv__metrics_inc_events(loop, nevents);
397    if (reset_timeout != 0) {
398      timeout = user_timeout;
399      reset_timeout = 0;
400      uv__metrics_inc_events_waiting(loop, nevents);
401    }
402
403    if (have_signals != 0) {
404      uv__metrics_update_idle_time(loop);
405      loop->signal_io_watcher.cb(loop, &loop->signal_io_watcher, POLLIN);
406    }
407
408    loop->watchers[loop->nwatchers] = NULL;
409    loop->watchers[loop->nwatchers + 1] = NULL;
410
411    if (have_signals != 0)
412      return;  /* Event loop should cycle now so don't poll again. */
413
414    if (nevents != 0) {
415      if (nfds == ARRAY_SIZE(events) && --count != 0) {
416        /* Poll for more events but don't block this time. */
417        timeout = 0;
418        continue;
419      }
420      return;
421    }
422
423update_timeout:
424    if (timeout == 0)
425      return;
426
427    if (timeout == -1)
428      continue;
429
430    assert(timeout > 0);
431
432    diff = loop->time - base;
433    if (diff >= (uint64_t) timeout)
434      return;
435
436    timeout -= diff;
437  }
438}
439
440
441void uv__platform_invalidate_fd(uv_loop_t* loop, int fd) {
442  struct kevent* events;
443  uintptr_t i;
444  uintptr_t nfds;
445
446  assert(loop->watchers != NULL);
447  assert(fd >= 0);
448
449  events = (struct kevent*) loop->watchers[loop->nwatchers];
450  nfds = (uintptr_t) loop->watchers[loop->nwatchers + 1];
451  if (events == NULL)
452    return;
453
454  /* Invalidate events with same file descriptor */
455  for (i = 0; i < nfds; i++)
456    if ((int) events[i].ident == fd && events[i].filter != EVFILT_PROC)
457      events[i].ident = -1;
458}
459
460
461static void uv__fs_event(uv_loop_t* loop, uv__io_t* w, unsigned int fflags) {
462  uv_fs_event_t* handle;
463  struct kevent ev;
464  int events;
465  const char* path;
466#if defined(F_GETPATH)
467  /* MAXPATHLEN == PATH_MAX but the former is what XNU calls it internally. */
468  char pathbuf[MAXPATHLEN];
469#endif
470
471  handle = container_of(w, uv_fs_event_t, event_watcher);
472
473  if (fflags & (NOTE_ATTRIB | NOTE_EXTEND))
474    events = UV_CHANGE;
475  else
476    events = UV_RENAME;
477
478  path = NULL;
479#if defined(F_GETPATH)
480  /* Also works when the file has been unlinked from the file system. Passing
481   * in the path when the file has been deleted is arguably a little strange
482   * but it's consistent with what the inotify backend does.
483   */
484  if (fcntl(handle->event_watcher.fd, F_GETPATH, pathbuf) == 0)
485    path = uv__basename_r(pathbuf);
486#elif defined(F_KINFO)
487  /* We try to get the file info reference from the file descriptor.
488   * the struct's kf_structsize must be initialised beforehand
489   * whether with the KINFO_FILE_SIZE constant or this way.
490   */
491  struct stat statbuf;
492  struct kinfo_file kf;
493
494  if (handle->event_watcher.fd != -1 &&
495     (!uv__fstat(handle->event_watcher.fd, &statbuf) && !(statbuf.st_mode & S_IFDIR))) {
496     /* we are purposely not using KINFO_FILE_SIZE here
497      * as it is not available on non intl archs
498      * and here it gives 1392 too on intel.
499      * anyway, the man page also mentions we can proceed
500      * this way.
501      */
502     kf.kf_structsize = sizeof(kf);
503     if (fcntl(handle->event_watcher.fd, F_KINFO, &kf) == 0)
504       path = uv__basename_r(kf.kf_path);
505  }
506#endif
507  handle->cb(handle, path, events, 0);
508
509  if (handle->event_watcher.fd == -1)
510    return;
511
512  /* Watcher operates in one-shot mode, re-arm it. */
513  fflags = NOTE_ATTRIB | NOTE_WRITE  | NOTE_RENAME
514         | NOTE_DELETE | NOTE_EXTEND | NOTE_REVOKE;
515
516  EV_SET(&ev, w->fd, EVFILT_VNODE, EV_ADD | EV_ONESHOT, fflags, 0, 0);
517
518  if (kevent(loop->backend_fd, &ev, 1, NULL, 0, NULL))
519    abort();
520}
521
522
523int uv_fs_event_init(uv_loop_t* loop, uv_fs_event_t* handle) {
524  uv__handle_init(loop, (uv_handle_t*)handle, UV_FS_EVENT);
525  return 0;
526}
527
528
529int uv_fs_event_start(uv_fs_event_t* handle,
530                      uv_fs_event_cb cb,
531                      const char* path,
532                      unsigned int flags) {
533  int fd;
534#if defined(__APPLE__) && MAC_OS_X_VERSION_MAX_ALLOWED >= 1070
535  struct stat statbuf;
536#endif
537
538  if (uv__is_active(handle))
539    return UV_EINVAL;
540
541  handle->cb = cb;
542  handle->path = uv__strdup(path);
543  if (handle->path == NULL)
544    return UV_ENOMEM;
545
546  /* TODO open asynchronously - but how do we report back errors? */
547  fd = open(handle->path, O_RDONLY);
548  if (fd == -1) {
549    uv__free(handle->path);
550    handle->path = NULL;
551    return UV__ERR(errno);
552  }
553
554#if defined(__APPLE__) && MAC_OS_X_VERSION_MAX_ALLOWED >= 1070
555  /* Nullify field to perform checks later */
556  handle->cf_cb = NULL;
557  handle->realpath = NULL;
558  handle->realpath_len = 0;
559  handle->cf_flags = flags;
560
561  if (uv__fstat(fd, &statbuf))
562    goto fallback;
563  /* FSEvents works only with directories */
564  if (!(statbuf.st_mode & S_IFDIR))
565    goto fallback;
566
567  if (0 == atomic_load_explicit(&uv__has_forked_with_cfrunloop,
568                                memory_order_relaxed)) {
569    int r;
570    /* The fallback fd is no longer needed */
571    uv__close_nocheckstdio(fd);
572    handle->event_watcher.fd = -1;
573    r = uv__fsevents_init(handle);
574    if (r == 0) {
575      uv__handle_start(handle);
576    } else {
577      uv__free(handle->path);
578      handle->path = NULL;
579    }
580    return r;
581  }
582fallback:
583#endif /* #if defined(__APPLE__) && MAC_OS_X_VERSION_MAX_ALLOWED >= 1070 */
584
585  uv__handle_start(handle);
586  uv__io_init(&handle->event_watcher, uv__fs_event, fd);
587  uv__io_start(handle->loop, &handle->event_watcher, POLLIN);
588
589  return 0;
590}
591
592
593int uv_fs_event_stop(uv_fs_event_t* handle) {
594  int r;
595  r = 0;
596
597  if (!uv__is_active(handle))
598    return 0;
599
600  uv__handle_stop(handle);
601
602#if defined(__APPLE__) && MAC_OS_X_VERSION_MAX_ALLOWED >= 1070
603  if (0 == atomic_load_explicit(&uv__has_forked_with_cfrunloop,
604                                memory_order_relaxed))
605    if (handle->cf_cb != NULL)
606      r = uv__fsevents_close(handle);
607#endif
608
609  if (handle->event_watcher.fd != -1) {
610    uv__io_close(handle->loop, &handle->event_watcher);
611    uv__close(handle->event_watcher.fd);
612    handle->event_watcher.fd = -1;
613  }
614
615  uv__free(handle->path);
616  handle->path = NULL;
617
618  return r;
619}
620
621
622void uv__fs_event_close(uv_fs_event_t* handle) {
623  uv_fs_event_stop(handle);
624}
625