1 /* Copyright Joyent, Inc. and other Node contributors. All rights reserved.
2 * Permission is hereby granted, free of charge, to any person obtaining a copy
3 * of this software and associated documentation files (the "Software"), to
4 * deal in the Software without restriction, including without limitation the
5 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
6 * sell copies of the Software, and to permit persons to whom the Software is
7 * furnished to do so, subject to the following conditions:
8 *
9 * The above copyright notice and this permission notice shall be included in
10 * all copies or substantial portions of the Software.
11 *
12 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
13 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
14 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
15 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
16 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
17 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
18 * IN THE SOFTWARE.
19 */
20
21 /* We lean on the fact that POLL{IN,OUT,ERR,HUP} correspond with their
22 * EPOLL* counterparts. We use the POLL* variants in this file because that
23 * is what libuv uses elsewhere.
24 */
25
26 #include "uv.h"
27 #include "internal.h"
28 #include "uv_log.h"
29 #include <inttypes.h>
30 #include <stdatomic.h>
31 #include <stddef.h> /* offsetof */
32 #include <stdint.h>
33 #include <stdio.h>
34 #include <stdlib.h>
35 #include <string.h>
36 #include <assert.h>
37 #include <errno.h>
38
39 #include <fcntl.h>
40 #include <ifaddrs.h>
41 #include <net/ethernet.h>
42 #include <net/if.h>
43 #include <netpacket/packet.h>
44 #include <sys/epoll.h>
45 #include <sys/inotify.h>
46 #include <sys/mman.h>
47 #include <sys/param.h>
48 #include <sys/prctl.h>
49 #include <sys/socket.h>
50 #include <sys/stat.h>
51 #include <sys/syscall.h>
52 #include <sys/sysinfo.h>
53 #include <sys/sysmacros.h>
54 #include <sys/types.h>
55 #include <sys/utsname.h>
56 #include <time.h>
57 #include <unistd.h>
58
59 #ifdef USE_FFRT
60 #include "ffrt.h"
61 #include "c/executor_task.h"
62
uv__epoll_wait(struct epoll_event* events, int eventsize, uint64_t timeout)63 int uv__epoll_wait(struct epoll_event* events, int eventsize, uint64_t timeout) {
64 int nfds = 0;
65 if (ffrt_get_cur_task() != NULL) {
66 ffrt_qos_t qos = ffrt_this_task_get_qos();
67 nfds = ffrt_epoll_wait(qos, events, eventsize, timeout);
68 }
69 return nfds;
70 }
71 #endif
72
uv__epoll_ctl(int epoll_fd, int op, int fd, struct epoll_event* event)73 int uv__epoll_ctl(int epoll_fd, int op, int fd, struct epoll_event* event) {
74 #ifdef USE_FFRT
75 if (ffrt_get_cur_task() != NULL) {
76 ffrt_qos_t qos = ffrt_this_task_get_qos();
77 return ffrt_epoll_ctl(qos, op, fd, event == NULL ? 0 : event->events, NULL, NULL);
78 }
79 #endif
80 return epoll_ctl(epoll_fd, op, fd ,event);
81 }
82 #ifndef __NR_io_uring_setup
83 # define __NR_io_uring_setup 425
84 #endif
85
86 #ifndef __NR_io_uring_enter
87 # define __NR_io_uring_enter 426
88 #endif
89
90 #ifndef __NR_io_uring_register
91 # define __NR_io_uring_register 427
92 #endif
93
94 #ifndef __NR_copy_file_range
95 # if defined(__x86_64__)
96 # define __NR_copy_file_range 326
97 # elif defined(__i386__)
98 # define __NR_copy_file_range 377
99 # elif defined(__s390__)
100 # define __NR_copy_file_range 375
101 # elif defined(__arm__)
102 # define __NR_copy_file_range 391
103 # elif defined(__aarch64__)
104 # define __NR_copy_file_range 285
105 # elif defined(__powerpc__)
106 # define __NR_copy_file_range 379
107 # elif defined(__arc__)
108 # define __NR_copy_file_range 285
109 # elif defined(__riscv)
110 # define __NR_copy_file_range 285
111 # endif
112 #endif /* __NR_copy_file_range */
113
114 #ifndef __NR_statx
115 # if defined(__x86_64__)
116 # define __NR_statx 332
117 # elif defined(__i386__)
118 # define __NR_statx 383
119 # elif defined(__aarch64__)
120 # define __NR_statx 397
121 # elif defined(__arm__)
122 # define __NR_statx 397
123 # elif defined(__ppc__)
124 # define __NR_statx 383
125 # elif defined(__s390__)
126 # define __NR_statx 379
127 # elif defined(__riscv)
128 # define __NR_statx 291
129 # endif
130 #endif /* __NR_statx */
131
132 #ifndef __NR_getrandom
133 # if defined(__x86_64__)
134 # define __NR_getrandom 318
135 # elif defined(__i386__)
136 # define __NR_getrandom 355
137 # elif defined(__aarch64__)
138 # define __NR_getrandom 384
139 # elif defined(__arm__)
140 # define __NR_getrandom 384
141 # elif defined(__ppc__)
142 # define __NR_getrandom 359
143 # elif defined(__s390__)
144 # define __NR_getrandom 349
145 # elif defined(__riscv)
146 # define __NR_getrandom 278
147 # endif
148 #endif /* __NR_getrandom */
149
150 enum {
151 UV__IORING_SETUP_SQPOLL = 2u,
152 };
153
154 enum {
155 UV__IORING_FEAT_SINGLE_MMAP = 1u,
156 UV__IORING_FEAT_NODROP = 2u,
157 UV__IORING_FEAT_RSRC_TAGS = 1024u, /* linux v5.13 */
158 };
159
160 enum {
161 UV__IORING_OP_READV = 1,
162 UV__IORING_OP_WRITEV = 2,
163 UV__IORING_OP_FSYNC = 3,
164 UV__IORING_OP_OPENAT = 18,
165 UV__IORING_OP_CLOSE = 19,
166 UV__IORING_OP_STATX = 21,
167 UV__IORING_OP_EPOLL_CTL = 29,
168 UV__IORING_OP_RENAMEAT = 35,
169 UV__IORING_OP_UNLINKAT = 36,
170 UV__IORING_OP_MKDIRAT = 37,
171 UV__IORING_OP_SYMLINKAT = 38,
172 UV__IORING_OP_LINKAT = 39,
173 };
174
175 enum {
176 UV__IORING_ENTER_GETEVENTS = 1u,
177 UV__IORING_ENTER_SQ_WAKEUP = 2u,
178 };
179
180 enum {
181 UV__IORING_SQ_NEED_WAKEUP = 1u,
182 UV__IORING_SQ_CQ_OVERFLOW = 2u,
183 };
184
185 enum {
186 UV__MKDIRAT_SYMLINKAT_LINKAT = 1u,
187 };
188
189 struct uv__io_cqring_offsets {
190 uint32_t head;
191 uint32_t tail;
192 uint32_t ring_mask;
193 uint32_t ring_entries;
194 uint32_t overflow;
195 uint32_t cqes;
196 uint64_t reserved0;
197 uint64_t reserved1;
198 };
199
200 STATIC_ASSERT(40 == sizeof(struct uv__io_cqring_offsets));
201
202 struct uv__io_sqring_offsets {
203 uint32_t head;
204 uint32_t tail;
205 uint32_t ring_mask;
206 uint32_t ring_entries;
207 uint32_t flags;
208 uint32_t dropped;
209 uint32_t array;
210 uint32_t reserved0;
211 uint64_t reserved1;
212 };
213
214 STATIC_ASSERT(40 == sizeof(struct uv__io_sqring_offsets));
215
216 struct uv__io_uring_cqe {
217 uint64_t user_data;
218 int32_t res;
219 uint32_t flags;
220 };
221
222 STATIC_ASSERT(16 == sizeof(struct uv__io_uring_cqe));
223
224 struct uv__io_uring_sqe {
225 uint8_t opcode;
226 uint8_t flags;
227 uint16_t ioprio;
228 int32_t fd;
229 union {
230 uint64_t off;
231 uint64_t addr2;
232 };
233 union {
234 uint64_t addr;
235 };
236 uint32_t len;
237 union {
238 uint32_t rw_flags;
239 uint32_t fsync_flags;
240 uint32_t open_flags;
241 uint32_t statx_flags;
242 };
243 uint64_t user_data;
244 union {
245 uint16_t buf_index;
246 uint64_t pad[3];
247 };
248 };
249
250 STATIC_ASSERT(64 == sizeof(struct uv__io_uring_sqe));
251 STATIC_ASSERT(0 == offsetof(struct uv__io_uring_sqe, opcode));
252 STATIC_ASSERT(1 == offsetof(struct uv__io_uring_sqe, flags));
253 STATIC_ASSERT(2 == offsetof(struct uv__io_uring_sqe, ioprio));
254 STATIC_ASSERT(4 == offsetof(struct uv__io_uring_sqe, fd));
255 STATIC_ASSERT(8 == offsetof(struct uv__io_uring_sqe, off));
256 STATIC_ASSERT(16 == offsetof(struct uv__io_uring_sqe, addr));
257 STATIC_ASSERT(24 == offsetof(struct uv__io_uring_sqe, len));
258 STATIC_ASSERT(28 == offsetof(struct uv__io_uring_sqe, rw_flags));
259 STATIC_ASSERT(32 == offsetof(struct uv__io_uring_sqe, user_data));
260 STATIC_ASSERT(40 == offsetof(struct uv__io_uring_sqe, buf_index));
261
262 struct uv__io_uring_params {
263 uint32_t sq_entries;
264 uint32_t cq_entries;
265 uint32_t flags;
266 uint32_t sq_thread_cpu;
267 uint32_t sq_thread_idle;
268 uint32_t features;
269 uint32_t reserved[4];
270 struct uv__io_sqring_offsets sq_off; /* 40 bytes */
271 struct uv__io_cqring_offsets cq_off; /* 40 bytes */
272 };
273
274 STATIC_ASSERT(40 + 40 + 40 == sizeof(struct uv__io_uring_params));
275 STATIC_ASSERT(40 == offsetof(struct uv__io_uring_params, sq_off));
276 STATIC_ASSERT(80 == offsetof(struct uv__io_uring_params, cq_off));
277
278 STATIC_ASSERT(EPOLL_CTL_ADD < 4);
279 STATIC_ASSERT(EPOLL_CTL_DEL < 4);
280 STATIC_ASSERT(EPOLL_CTL_MOD < 4);
281
282 struct watcher_list {
283 RB_ENTRY(watcher_list) entry;
284 struct uv__queue watchers;
285 int iterating;
286 char* path;
287 int wd;
288 };
289
290 struct watcher_root {
291 struct watcher_list* rbh_root;
292 };
293
294 static int uv__inotify_fork(uv_loop_t* loop, struct watcher_list* root);
295 static void uv__inotify_read(uv_loop_t* loop,
296 uv__io_t* w,
297 unsigned int revents);
298 static int compare_watchers(const struct watcher_list* a,
299 const struct watcher_list* b);
300 static void maybe_free_watcher_list(struct watcher_list* w,
301 uv_loop_t* loop);
302
303 static void uv__epoll_ctl_flush(int epollfd,
304 struct uv__iou* ctl,
305 struct epoll_event (*events)[256]);
306
307 static void uv__epoll_ctl_prep(int epollfd,
308 struct uv__iou* ctl,
309 struct epoll_event (*events)[256],
310 int op,
311 int fd,
312 struct epoll_event* e);
313
314 RB_GENERATE_STATIC(watcher_root, watcher_list, entry, compare_watchers)
315
316
uv__inotify_watchers(uv_loop_t* loop)317 static struct watcher_root* uv__inotify_watchers(uv_loop_t* loop) {
318 /* This cast works because watcher_root is a struct with a pointer as its
319 * sole member. Such type punning is unsafe in the presence of strict
320 * pointer aliasing (and is just plain nasty) but that is why libuv
321 * is compiled with -fno-strict-aliasing.
322 */
323 return (struct watcher_root*) &loop->inotify_watchers;
324 }
325
326
uv__kernel_version(void)327 unsigned uv__kernel_version(void) {
328 static _Atomic unsigned cached_version;
329 struct utsname u;
330 unsigned version;
331 unsigned major;
332 unsigned minor;
333 unsigned patch;
334 char v_sig[256];
335 char* needle;
336
337 version = atomic_load_explicit(&cached_version, memory_order_relaxed);
338 if (version != 0)
339 return version;
340
341 /* Check /proc/version_signature first as it's the way to get the mainline
342 * kernel version in Ubuntu. The format is:
343 * Ubuntu ubuntu_kernel_version mainline_kernel_version
344 * For example:
345 * Ubuntu 5.15.0-79.86-generic 5.15.111
346 */
347 if (0 == uv__slurp("/proc/version_signature", v_sig, sizeof(v_sig)))
348 if (3 == sscanf(v_sig, "Ubuntu %*s %u.%u.%u", &major, &minor, &patch))
349 goto calculate_version;
350
351 if (-1 == uname(&u))
352 return 0;
353
354 /* In Debian we need to check `version` instead of `release` to extract the
355 * mainline kernel version. This is an example of how it looks like:
356 * #1 SMP Debian 5.10.46-4 (2021-08-03)
357 */
358 needle = strstr(u.version, "Debian ");
359 if (needle != NULL)
360 if (3 == sscanf(needle, "Debian %u.%u.%u", &major, &minor, &patch))
361 goto calculate_version;
362
363 if (3 != sscanf(u.release, "%u.%u.%u", &major, &minor, &patch))
364 return 0;
365
366 /* Handle it when the process runs under the UNAME26 personality:
367 *
368 * - kernels >= 3.x identify as 2.6.40+x
369 * - kernels >= 4.x identify as 2.6.60+x
370 *
371 * UNAME26 is a poorly conceived hack that doesn't let us distinguish
372 * between 4.x kernels and 5.x/6.x kernels so we conservatively assume
373 * that 2.6.60+x means 4.x.
374 *
375 * Fun fact of the day: it's technically possible to observe the actual
376 * kernel version for a brief moment because uname() first copies out the
377 * real release string before overwriting it with the backcompat string.
378 */
379 if (major == 2 && minor == 6) {
380 if (patch >= 60) {
381 major = 4;
382 minor = patch - 60;
383 patch = 0;
384 } else if (patch >= 40) {
385 major = 3;
386 minor = patch - 40;
387 patch = 0;
388 }
389 }
390
391 calculate_version:
392 version = major * 65536 + minor * 256 + patch;
393 atomic_store_explicit(&cached_version, version, memory_order_relaxed);
394
395 return version;
396 }
397
398
399 ssize_t
uv__fs_copy_file_range(int fd_in, off_t* off_in, int fd_out, off_t* off_out, size_t len, unsigned int flags)400 uv__fs_copy_file_range(int fd_in,
401 off_t* off_in,
402 int fd_out,
403 off_t* off_out,
404 size_t len,
405 unsigned int flags)
406 {
407 #ifdef __NR_copy_file_range
408 return syscall(__NR_copy_file_range,
409 fd_in,
410 off_in,
411 fd_out,
412 off_out,
413 len,
414 flags);
415 #else
416 return errno = ENOSYS, -1;
417 #endif
418 }
419
420
uv__statx(int dirfd, const char* path, int flags, unsigned int mask, struct uv__statx* statxbuf)421 int uv__statx(int dirfd,
422 const char* path,
423 int flags,
424 unsigned int mask,
425 struct uv__statx* statxbuf) {
426 #if !defined(__NR_statx) || defined(__ANDROID_API__) && __ANDROID_API__ < 30
427 return errno = ENOSYS, -1;
428 #else
429 int rc;
430
431 rc = syscall(__NR_statx, dirfd, path, flags, mask, statxbuf);
432 if (rc >= 0)
433 uv__msan_unpoison(statxbuf, sizeof(*statxbuf));
434
435 return rc;
436 #endif
437 }
438
439
uv__getrandom(void* buf, size_t buflen, unsigned flags)440 ssize_t uv__getrandom(void* buf, size_t buflen, unsigned flags) {
441 #if !defined(__NR_getrandom) || defined(__ANDROID_API__) && __ANDROID_API__ < 28
442 return errno = ENOSYS, -1;
443 #else
444 ssize_t rc;
445
446 rc = syscall(__NR_getrandom, buf, buflen, flags);
447 if (rc >= 0)
448 uv__msan_unpoison(buf, buflen);
449
450 return rc;
451 #endif
452 }
453
454
uv__io_uring_setup(int entries, struct uv__io_uring_params* params)455 int uv__io_uring_setup(int entries, struct uv__io_uring_params* params) {
456 return syscall(__NR_io_uring_setup, entries, params);
457 }
458
459
uv__io_uring_enter(int fd, unsigned to_submit, unsigned min_complete, unsigned flags)460 int uv__io_uring_enter(int fd,
461 unsigned to_submit,
462 unsigned min_complete,
463 unsigned flags) {
464 /* io_uring_enter used to take a sigset_t but it's unused
465 * in newer kernels unless IORING_ENTER_EXT_ARG is set,
466 * in which case it takes a struct io_uring_getevents_arg.
467 */
468 return syscall(__NR_io_uring_enter,
469 fd,
470 to_submit,
471 min_complete,
472 flags,
473 NULL,
474 0L);
475 }
476
477
uv__io_uring_register(int fd, unsigned opcode, void* arg, unsigned nargs)478 int uv__io_uring_register(int fd, unsigned opcode, void* arg, unsigned nargs) {
479 return syscall(__NR_io_uring_register, fd, opcode, arg, nargs);
480 }
481
482
uv__use_io_uring(void)483 static int uv__use_io_uring(void) {
484 #if defined(USE_OHOS_DFX)
485 return 0;
486 #endif
487 #if defined(__ANDROID_API__)
488 return 0; /* Possibly available but blocked by seccomp. */
489 #elif defined(__arm__) && __SIZEOF_POINTER__ == 4
490 /* See https://github.com/libuv/libuv/issues/4158. */
491 return 0; /* All 32 bits kernels appear buggy. */
492 #elif defined(__powerpc64__) || defined(__ppc64__)
493 /* See https://github.com/libuv/libuv/issues/4283. */
494 return 0; /* Random SIGSEGV in signal handler. */
495 #else
496 /* Ternary: unknown=0, yes=1, no=-1 */
497 static _Atomic int use_io_uring;
498 char* val;
499 int use;
500
501 use = atomic_load_explicit(&use_io_uring, memory_order_relaxed);
502
503 if (use == 0) {
504 use = uv__kernel_version() >=
505 #if defined(__hppa__)
506 /* io_uring first supported on parisc in 6.1, functional in .51 */
507 /* https://lore.kernel.org/all/cb912694-b1fe-dbb0-4d8c-d608f3526905@gmx.de/ */
508 /* 6.1.51 */ 0x060133
509 #else
510 /* Older kernels have a bug where the sqpoll thread uses 100% CPU. */
511 /* 5.10.186 */ 0x050ABA
512 #endif
513 ? 1 : -1;
514
515 /* But users can still enable it if they so desire. */
516 val = getenv("UV_USE_IO_URING");
517 if (val != NULL)
518 use = atoi(val) ? 1 : -1;
519
520 atomic_store_explicit(&use_io_uring, use, memory_order_relaxed);
521 }
522
523 return use > 0;
524 #endif
525 }
526
527
uv__iou_init(int epollfd, struct uv__iou* iou, uint32_t entries, uint32_t flags)528 static void uv__iou_init(int epollfd,
529 struct uv__iou* iou,
530 uint32_t entries,
531 uint32_t flags) {
532 struct uv__io_uring_params params;
533 struct epoll_event e;
534 size_t cqlen;
535 size_t sqlen;
536 size_t maxlen;
537 size_t sqelen;
538 uint32_t i;
539 char* sq;
540 char* sqe;
541 int ringfd;
542
543 sq = MAP_FAILED;
544 sqe = MAP_FAILED;
545
546 if (!uv__use_io_uring())
547 return;
548
549 /* SQPOLL required CAP_SYS_NICE until linux v5.12 relaxed that requirement.
550 * Mostly academic because we check for a v5.13 kernel afterwards anyway.
551 */
552 memset(¶ms, 0, sizeof(params));
553 params.flags = flags;
554
555 if (flags & UV__IORING_SETUP_SQPOLL)
556 params.sq_thread_idle = 10; /* milliseconds */
557
558 /* Kernel returns a file descriptor with O_CLOEXEC flag set. */
559 ringfd = uv__io_uring_setup(entries, ¶ms);
560 if (ringfd == -1)
561 return;
562
563 /* IORING_FEAT_RSRC_TAGS is used to detect linux v5.13 but what we're
564 * actually detecting is whether IORING_OP_STATX works with SQPOLL.
565 */
566 if (!(params.features & UV__IORING_FEAT_RSRC_TAGS))
567 goto fail;
568
569 /* Implied by IORING_FEAT_RSRC_TAGS but checked explicitly anyway. */
570 if (!(params.features & UV__IORING_FEAT_SINGLE_MMAP))
571 goto fail;
572
573 /* Implied by IORING_FEAT_RSRC_TAGS but checked explicitly anyway. */
574 if (!(params.features & UV__IORING_FEAT_NODROP))
575 goto fail;
576
577 sqlen = params.sq_off.array + params.sq_entries * sizeof(uint32_t);
578 cqlen =
579 params.cq_off.cqes + params.cq_entries * sizeof(struct uv__io_uring_cqe);
580 maxlen = sqlen < cqlen ? cqlen : sqlen;
581 sqelen = params.sq_entries * sizeof(struct uv__io_uring_sqe);
582
583 sq = mmap(0,
584 maxlen,
585 PROT_READ | PROT_WRITE,
586 MAP_SHARED | MAP_POPULATE,
587 ringfd,
588 0); /* IORING_OFF_SQ_RING */
589
590 sqe = mmap(0,
591 sqelen,
592 PROT_READ | PROT_WRITE,
593 MAP_SHARED | MAP_POPULATE,
594 ringfd,
595 0x10000000ull); /* IORING_OFF_SQES */
596
597 if (sq == MAP_FAILED || sqe == MAP_FAILED)
598 goto fail;
599
600 if (flags & UV__IORING_SETUP_SQPOLL) {
601 /* Only interested in completion events. To get notified when
602 * the kernel pulls items from the submission ring, add POLLOUT.
603 */
604 memset(&e, 0, sizeof(e));
605 e.events = POLLIN;
606 e.data.fd = ringfd;
607
608 if (uv__epoll_ctl(epollfd, EPOLL_CTL_ADD, ringfd, &e))
609 goto fail;
610 }
611
612 iou->sqhead = (uint32_t*) (sq + params.sq_off.head);
613 iou->sqtail = (uint32_t*) (sq + params.sq_off.tail);
614 iou->sqmask = *(uint32_t*) (sq + params.sq_off.ring_mask);
615 iou->sqarray = (uint32_t*) (sq + params.sq_off.array);
616 iou->sqflags = (uint32_t*) (sq + params.sq_off.flags);
617 iou->cqhead = (uint32_t*) (sq + params.cq_off.head);
618 iou->cqtail = (uint32_t*) (sq + params.cq_off.tail);
619 iou->cqmask = *(uint32_t*) (sq + params.cq_off.ring_mask);
620 iou->sq = sq;
621 iou->cqe = sq + params.cq_off.cqes;
622 iou->sqe = sqe;
623 iou->sqlen = sqlen;
624 iou->cqlen = cqlen;
625 iou->maxlen = maxlen;
626 iou->sqelen = sqelen;
627 iou->ringfd = ringfd;
628 iou->in_flight = 0;
629 iou->flags = 0;
630
631 if (uv__kernel_version() >= /* 5.15.0 */ 0x050F00)
632 iou->flags |= UV__MKDIRAT_SYMLINKAT_LINKAT;
633
634 for (i = 0; i <= iou->sqmask; i++)
635 iou->sqarray[i] = i; /* Slot -> sqe identity mapping. */
636
637 return;
638
639 fail:
640 if (sq != MAP_FAILED)
641 munmap(sq, maxlen);
642
643 if (sqe != MAP_FAILED)
644 munmap(sqe, sqelen);
645
646 uv__close(ringfd);
647 }
648
649
uv__iou_delete(struct uv__iou* iou)650 static void uv__iou_delete(struct uv__iou* iou) {
651 if (iou->ringfd != -1) {
652 munmap(iou->sq, iou->maxlen);
653 munmap(iou->sqe, iou->sqelen);
654 uv__close(iou->ringfd);
655 iou->ringfd = -1;
656 }
657 }
658
659
uv__platform_loop_init(uv_loop_t* loop)660 int uv__platform_loop_init(uv_loop_t* loop) {
661 uv__loop_internal_fields_t* lfields;
662
663 lfields = uv__get_internal_fields(loop);
664 lfields->ctl.ringfd = -1;
665 lfields->iou.ringfd = -1;
666
667 loop->inotify_watchers = NULL;
668 loop->inotify_fd = -1;
669 loop->backend_fd = epoll_create1(O_CLOEXEC);
670 #ifdef USE_OHOS_DFX
671 fdsan_exchange_owner_tag(loop->backend_fd, 0, uv__get_addr_tag((void *)&loop->backend_fd));
672 #endif
673 if (loop->backend_fd == -1)
674 return UV__ERR(errno);
675
676 uv__iou_init(loop->backend_fd, &lfields->iou, 64, UV__IORING_SETUP_SQPOLL);
677 uv__iou_init(loop->backend_fd, &lfields->ctl, 256, 0);
678 UV_LOGI("init:%{public}zu, backend_fd:%{public}d", (size_t)loop, loop->backend_fd);
679 return 0;
680 }
681
682
uv__io_fork(uv_loop_t* loop)683 int uv__io_fork(uv_loop_t* loop) {
684 int err;
685 struct watcher_list* root;
686
687 root = uv__inotify_watchers(loop)->rbh_root;
688 #ifdef USE_OHOS_DFX
689 fdsan_close_with_tag(loop->backend_fd, uv__get_addr_tag((void *)&loop->backend_fd));
690 #else
691 uv__close(loop->backend_fd);
692 #endif
693 loop->backend_fd = -1;
694
695 /* TODO(bnoordhuis) Loses items from the submission and completion rings. */
696 uv__platform_loop_delete(loop);
697
698 err = uv__platform_loop_init(loop);
699 if (err)
700 return err;
701
702 return uv__inotify_fork(loop, root);
703 }
704
705
uv__platform_loop_delete(uv_loop_t* loop)706 void uv__platform_loop_delete(uv_loop_t* loop) {
707 uv__loop_internal_fields_t* lfields;
708
709 lfields = uv__get_internal_fields(loop);
710 uv__iou_delete(&lfields->ctl);
711 uv__iou_delete(&lfields->iou);
712
713 if (loop->inotify_fd != -1) {
714 uv__io_stop(loop, &loop->inotify_read_watcher, POLLIN);
715 uv__close(loop->inotify_fd);
716 loop->inotify_fd = -1;
717 }
718 }
719
720
721 struct uv__invalidate {
722 struct epoll_event (*prep)[256];
723 struct epoll_event* events;
724 int nfds;
725 };
726
727
uv__platform_invalidate_fd(uv_loop_t* loop, int fd)728 void uv__platform_invalidate_fd(uv_loop_t* loop, int fd) {
729 uv__loop_internal_fields_t* lfields;
730 struct uv__invalidate* inv;
731 struct epoll_event dummy;
732 int i;
733
734 lfields = uv__get_internal_fields(loop);
735 inv = lfields->inv;
736
737 /* Invalidate events with same file descriptor */
738 if (inv != NULL)
739 for (i = 0; i < inv->nfds; i++)
740 if (inv->events[i].data.fd == fd)
741 inv->events[i].data.fd = -1;
742
743 /* Remove the file descriptor from the epoll.
744 * This avoids a problem where the same file description remains open
745 * in another process, causing repeated junk epoll events.
746 *
747 * We pass in a dummy epoll_event, to work around a bug in old kernels.
748 *
749 * Work around a bug in kernels 3.10 to 3.19 where passing a struct that
750 * has the EPOLLWAKEUP flag set generates spurious audit syslog warnings.
751 */
752 memset(&dummy, 0, sizeof(dummy));
753
754 if (inv == NULL) {
755 uv__epoll_ctl(loop->backend_fd, EPOLL_CTL_DEL, fd, &dummy);
756 } else {
757 uv__epoll_ctl_prep(loop->backend_fd,
758 &lfields->ctl,
759 inv->prep,
760 EPOLL_CTL_DEL,
761 fd,
762 &dummy);
763 }
764 }
765
766
uv__io_check_fd(uv_loop_t* loop, int fd)767 int uv__io_check_fd(uv_loop_t* loop, int fd) {
768 struct epoll_event e;
769 int rc;
770
771 memset(&e, 0, sizeof(e));
772 e.events = POLLIN;
773 e.data.fd = -1;
774
775 rc = 0;
776 if (uv__epoll_ctl(loop->backend_fd, EPOLL_CTL_ADD, fd, &e))
777 if (errno != EEXIST)
778 rc = UV__ERR(errno);
779
780 if (rc == 0)
781 if (uv__epoll_ctl(loop->backend_fd, EPOLL_CTL_DEL, fd, &e))
782 abort();
783
784 return rc;
785 }
786
787
788 /* Caller must initialize SQE and call uv__iou_submit(). */
uv__iou_get_sqe(struct uv__iou* iou, uv_loop_t* loop, uv_fs_t* req)789 static struct uv__io_uring_sqe* uv__iou_get_sqe(struct uv__iou* iou,
790 uv_loop_t* loop,
791 uv_fs_t* req) {
792 struct uv__io_uring_sqe* sqe;
793 uint32_t head;
794 uint32_t tail;
795 uint32_t mask;
796 uint32_t slot;
797
798 if (iou->ringfd == -1)
799 return NULL;
800
801 head = atomic_load_explicit((_Atomic uint32_t*) iou->sqhead,
802 memory_order_acquire);
803 tail = *iou->sqtail;
804 mask = iou->sqmask;
805
806 if ((head & mask) == ((tail + 1) & mask))
807 return NULL; /* No room in ring buffer. TODO(bnoordhuis) maybe flush it? */
808
809 slot = tail & mask;
810 sqe = iou->sqe;
811 sqe = &sqe[slot];
812 memset(sqe, 0, sizeof(*sqe));
813 sqe->user_data = (uintptr_t) req;
814
815 /* Pacify uv_cancel(). */
816 req->work_req.loop = loop;
817 req->work_req.work = NULL;
818 req->work_req.done = NULL;
819 uv__queue_init(&req->work_req.wq);
820
821 uv__req_register(loop, req);
822 iou->in_flight++;
823
824 return sqe;
825 }
826
827
uv__iou_submit(struct uv__iou* iou)828 static void uv__iou_submit(struct uv__iou* iou) {
829 uint32_t flags;
830
831 atomic_store_explicit((_Atomic uint32_t*) iou->sqtail,
832 *iou->sqtail + 1,
833 memory_order_release);
834
835 flags = atomic_load_explicit((_Atomic uint32_t*) iou->sqflags,
836 memory_order_acquire);
837
838 if (flags & UV__IORING_SQ_NEED_WAKEUP)
839 if (uv__io_uring_enter(iou->ringfd, 0, 0, UV__IORING_ENTER_SQ_WAKEUP))
840 if (errno != EOWNERDEAD) /* Kernel bug. Harmless, ignore. */
841 perror("libuv: io_uring_enter(wakeup)"); /* Can't happen. */
842 }
843
844
uv__iou_fs_close(uv_loop_t* loop, uv_fs_t* req)845 int uv__iou_fs_close(uv_loop_t* loop, uv_fs_t* req) {
846 struct uv__io_uring_sqe* sqe;
847 struct uv__iou* iou;
848 int kv;
849
850 kv = uv__kernel_version();
851 /* Work around a poorly understood bug in older kernels where closing a file
852 * descriptor pointing to /foo/bar results in ETXTBSY errors when trying to
853 * execve("/foo/bar") later on. The bug seems to have been fixed somewhere
854 * between 5.15.85 and 5.15.90. I couldn't pinpoint the responsible commit
855 * but good candidates are the several data race fixes. Interestingly, it
856 * seems to manifest only when running under Docker so the possibility of
857 * a Docker bug can't be completely ruled out either. Yay, computers.
858 * Also, disable on non-longterm versions between 5.16.0 (non-longterm) and
859 * 6.1.0 (longterm). Starting with longterm 6.1.x, the issue seems to be
860 * solved.
861 */
862 if (kv < /* 5.15.90 */ 0x050F5A)
863 return 0;
864
865 if (kv >= /* 5.16.0 */ 0x050A00 && kv < /* 6.1.0 */ 0x060100)
866 return 0;
867
868
869 iou = &uv__get_internal_fields(loop)->iou;
870
871 sqe = uv__iou_get_sqe(iou, loop, req);
872 if (sqe == NULL)
873 return 0;
874
875 sqe->fd = req->file;
876 sqe->opcode = UV__IORING_OP_CLOSE;
877
878 uv__iou_submit(iou);
879
880 return 1;
881 }
882
883
uv__iou_fs_fsync_or_fdatasync(uv_loop_t* loop, uv_fs_t* req, uint32_t fsync_flags)884 int uv__iou_fs_fsync_or_fdatasync(uv_loop_t* loop,
885 uv_fs_t* req,
886 uint32_t fsync_flags) {
887 struct uv__io_uring_sqe* sqe;
888 struct uv__iou* iou;
889
890 iou = &uv__get_internal_fields(loop)->iou;
891
892 sqe = uv__iou_get_sqe(iou, loop, req);
893 if (sqe == NULL)
894 return 0;
895
896 /* Little known fact: setting seq->off and seq->len turns
897 * it into an asynchronous sync_file_range() operation.
898 */
899 sqe->fd = req->file;
900 sqe->fsync_flags = fsync_flags;
901 sqe->opcode = UV__IORING_OP_FSYNC;
902
903 uv__iou_submit(iou);
904
905 return 1;
906 }
907
908
uv__iou_fs_link(uv_loop_t* loop, uv_fs_t* req)909 int uv__iou_fs_link(uv_loop_t* loop, uv_fs_t* req) {
910 struct uv__io_uring_sqe* sqe;
911 struct uv__iou* iou;
912
913 iou = &uv__get_internal_fields(loop)->iou;
914
915 if (!(iou->flags & UV__MKDIRAT_SYMLINKAT_LINKAT))
916 return 0;
917
918 sqe = uv__iou_get_sqe(iou, loop, req);
919 if (sqe == NULL)
920 return 0;
921
922 sqe->addr = (uintptr_t) req->path;
923 sqe->fd = AT_FDCWD;
924 sqe->addr2 = (uintptr_t) req->new_path;
925 sqe->len = AT_FDCWD;
926 sqe->opcode = UV__IORING_OP_LINKAT;
927
928 uv__iou_submit(iou);
929
930 return 1;
931 }
932
933
uv__iou_fs_mkdir(uv_loop_t* loop, uv_fs_t* req)934 int uv__iou_fs_mkdir(uv_loop_t* loop, uv_fs_t* req) {
935 struct uv__io_uring_sqe* sqe;
936 struct uv__iou* iou;
937
938 iou = &uv__get_internal_fields(loop)->iou;
939
940 if (!(iou->flags & UV__MKDIRAT_SYMLINKAT_LINKAT))
941 return 0;
942
943 sqe = uv__iou_get_sqe(iou, loop, req);
944 if (sqe == NULL)
945 return 0;
946
947 sqe->addr = (uintptr_t) req->path;
948 sqe->fd = AT_FDCWD;
949 sqe->len = req->mode;
950 sqe->opcode = UV__IORING_OP_MKDIRAT;
951
952 uv__iou_submit(iou);
953
954 return 1;
955 }
956
957
uv__iou_fs_open(uv_loop_t* loop, uv_fs_t* req)958 int uv__iou_fs_open(uv_loop_t* loop, uv_fs_t* req) {
959 struct uv__io_uring_sqe* sqe;
960 struct uv__iou* iou;
961
962 iou = &uv__get_internal_fields(loop)->iou;
963
964 sqe = uv__iou_get_sqe(iou, loop, req);
965 if (sqe == NULL)
966 return 0;
967
968 sqe->addr = (uintptr_t) req->path;
969 sqe->fd = AT_FDCWD;
970 sqe->len = req->mode;
971 sqe->opcode = UV__IORING_OP_OPENAT;
972 sqe->open_flags = req->flags | O_CLOEXEC;
973
974 uv__iou_submit(iou);
975
976 return 1;
977 }
978
979
uv__iou_fs_rename(uv_loop_t* loop, uv_fs_t* req)980 int uv__iou_fs_rename(uv_loop_t* loop, uv_fs_t* req) {
981 struct uv__io_uring_sqe* sqe;
982 struct uv__iou* iou;
983
984 iou = &uv__get_internal_fields(loop)->iou;
985
986 sqe = uv__iou_get_sqe(iou, loop, req);
987 if (sqe == NULL)
988 return 0;
989
990 sqe->addr = (uintptr_t) req->path;
991 sqe->fd = AT_FDCWD;
992 sqe->addr2 = (uintptr_t) req->new_path;
993 sqe->len = AT_FDCWD;
994 sqe->opcode = UV__IORING_OP_RENAMEAT;
995
996 uv__iou_submit(iou);
997
998 return 1;
999 }
1000
1001
uv__iou_fs_symlink(uv_loop_t* loop, uv_fs_t* req)1002 int uv__iou_fs_symlink(uv_loop_t* loop, uv_fs_t* req) {
1003 struct uv__io_uring_sqe* sqe;
1004 struct uv__iou* iou;
1005
1006 iou = &uv__get_internal_fields(loop)->iou;
1007
1008 if (!(iou->flags & UV__MKDIRAT_SYMLINKAT_LINKAT))
1009 return 0;
1010
1011 sqe = uv__iou_get_sqe(iou, loop, req);
1012 if (sqe == NULL)
1013 return 0;
1014
1015 sqe->addr = (uintptr_t) req->path;
1016 sqe->fd = AT_FDCWD;
1017 sqe->addr2 = (uintptr_t) req->new_path;
1018 sqe->opcode = UV__IORING_OP_SYMLINKAT;
1019
1020 uv__iou_submit(iou);
1021
1022 return 1;
1023 }
1024
1025
uv__iou_fs_unlink(uv_loop_t* loop, uv_fs_t* req)1026 int uv__iou_fs_unlink(uv_loop_t* loop, uv_fs_t* req) {
1027 struct uv__io_uring_sqe* sqe;
1028 struct uv__iou* iou;
1029
1030 iou = &uv__get_internal_fields(loop)->iou;
1031
1032 sqe = uv__iou_get_sqe(iou, loop, req);
1033 if (sqe == NULL)
1034 return 0;
1035
1036 sqe->addr = (uintptr_t) req->path;
1037 sqe->fd = AT_FDCWD;
1038 sqe->opcode = UV__IORING_OP_UNLINKAT;
1039
1040 uv__iou_submit(iou);
1041
1042 return 1;
1043 }
1044
1045
uv__iou_fs_read_or_write(uv_loop_t* loop, uv_fs_t* req, int is_read)1046 int uv__iou_fs_read_or_write(uv_loop_t* loop,
1047 uv_fs_t* req,
1048 int is_read) {
1049 struct uv__io_uring_sqe* sqe;
1050 struct uv__iou* iou;
1051
1052 /* If iovcnt is greater than IOV_MAX, cap it to IOV_MAX on reads and fallback
1053 * to the threadpool on writes */
1054 if (req->nbufs > IOV_MAX) {
1055 if (is_read)
1056 req->nbufs = IOV_MAX;
1057 else
1058 return 0;
1059 }
1060
1061 iou = &uv__get_internal_fields(loop)->iou;
1062
1063 sqe = uv__iou_get_sqe(iou, loop, req);
1064 if (sqe == NULL)
1065 return 0;
1066
1067 sqe->addr = (uintptr_t) req->bufs;
1068 sqe->fd = req->file;
1069 sqe->len = req->nbufs;
1070 sqe->off = req->off < 0 ? -1 : req->off;
1071 sqe->opcode = is_read ? UV__IORING_OP_READV : UV__IORING_OP_WRITEV;
1072
1073 uv__iou_submit(iou);
1074
1075 return 1;
1076 }
1077
1078
uv__iou_fs_statx(uv_loop_t* loop, uv_fs_t* req, int is_fstat, int is_lstat)1079 int uv__iou_fs_statx(uv_loop_t* loop,
1080 uv_fs_t* req,
1081 int is_fstat,
1082 int is_lstat) {
1083 struct uv__io_uring_sqe* sqe;
1084 struct uv__statx* statxbuf;
1085 struct uv__iou* iou;
1086
1087 statxbuf = uv__malloc(sizeof(*statxbuf));
1088 if (statxbuf == NULL)
1089 return 0;
1090
1091 iou = &uv__get_internal_fields(loop)->iou;
1092
1093 sqe = uv__iou_get_sqe(iou, loop, req);
1094 if (sqe == NULL) {
1095 uv__free(statxbuf);
1096 return 0;
1097 }
1098
1099 req->ptr = statxbuf;
1100
1101 sqe->addr = (uintptr_t) req->path;
1102 sqe->addr2 = (uintptr_t) statxbuf;
1103 sqe->fd = AT_FDCWD;
1104 sqe->len = 0xFFF; /* STATX_BASIC_STATS + STATX_BTIME */
1105 sqe->opcode = UV__IORING_OP_STATX;
1106
1107 if (is_fstat) {
1108 sqe->addr = (uintptr_t) "";
1109 sqe->fd = req->file;
1110 sqe->statx_flags |= 0x1000; /* AT_EMPTY_PATH */
1111 }
1112
1113 if (is_lstat)
1114 sqe->statx_flags |= AT_SYMLINK_NOFOLLOW;
1115
1116 uv__iou_submit(iou);
1117
1118 return 1;
1119 }
1120
1121
uv__statx_to_stat(const struct uv__statx* statxbuf, uv_stat_t* buf)1122 void uv__statx_to_stat(const struct uv__statx* statxbuf, uv_stat_t* buf) {
1123 buf->st_dev = makedev(statxbuf->stx_dev_major, statxbuf->stx_dev_minor);
1124 buf->st_mode = statxbuf->stx_mode;
1125 buf->st_nlink = statxbuf->stx_nlink;
1126 buf->st_uid = statxbuf->stx_uid;
1127 buf->st_gid = statxbuf->stx_gid;
1128 buf->st_rdev = makedev(statxbuf->stx_rdev_major, statxbuf->stx_rdev_minor);
1129 buf->st_ino = statxbuf->stx_ino;
1130 buf->st_size = statxbuf->stx_size;
1131 buf->st_blksize = statxbuf->stx_blksize;
1132 buf->st_blocks = statxbuf->stx_blocks;
1133 buf->st_atim.tv_sec = statxbuf->stx_atime.tv_sec;
1134 buf->st_atim.tv_nsec = statxbuf->stx_atime.tv_nsec;
1135 buf->st_mtim.tv_sec = statxbuf->stx_mtime.tv_sec;
1136 buf->st_mtim.tv_nsec = statxbuf->stx_mtime.tv_nsec;
1137 buf->st_ctim.tv_sec = statxbuf->stx_ctime.tv_sec;
1138 buf->st_ctim.tv_nsec = statxbuf->stx_ctime.tv_nsec;
1139 buf->st_birthtim.tv_sec = statxbuf->stx_btime.tv_sec;
1140 buf->st_birthtim.tv_nsec = statxbuf->stx_btime.tv_nsec;
1141 buf->st_flags = 0;
1142 buf->st_gen = 0;
1143 }
1144
1145
uv__iou_fs_statx_post(uv_fs_t* req)1146 static void uv__iou_fs_statx_post(uv_fs_t* req) {
1147 struct uv__statx* statxbuf;
1148 uv_stat_t* buf;
1149
1150 buf = &req->statbuf;
1151 statxbuf = req->ptr;
1152 req->ptr = NULL;
1153
1154 if (req->result == 0) {
1155 uv__msan_unpoison(statxbuf, sizeof(*statxbuf));
1156 uv__statx_to_stat(statxbuf, buf);
1157 req->ptr = buf;
1158 }
1159
1160 uv__free(statxbuf);
1161 }
1162
1163
uv__poll_io_uring(uv_loop_t* loop, struct uv__iou* iou)1164 static void uv__poll_io_uring(uv_loop_t* loop, struct uv__iou* iou) {
1165 struct uv__io_uring_cqe* cqe;
1166 struct uv__io_uring_cqe* e;
1167 uv_fs_t* req;
1168 uint32_t head;
1169 uint32_t tail;
1170 uint32_t mask;
1171 uint32_t i;
1172 uint32_t flags;
1173 int nevents;
1174 int rc;
1175
1176 head = *iou->cqhead;
1177 tail = atomic_load_explicit((_Atomic uint32_t*) iou->cqtail,
1178 memory_order_acquire);
1179 mask = iou->cqmask;
1180 cqe = iou->cqe;
1181 nevents = 0;
1182
1183 for (i = head; i != tail; i++) {
1184 e = &cqe[i & mask];
1185
1186 req = (uv_fs_t*) (uintptr_t) e->user_data;
1187 assert(req->type == UV_FS);
1188
1189 uv__req_unregister(loop, req);
1190 iou->in_flight--;
1191
1192 /* If the op is not supported by the kernel retry using the thread pool */
1193 if (e->res == -EOPNOTSUPP) {
1194 uv__fs_post(loop, req);
1195 continue;
1196 }
1197
1198 /* io_uring stores error codes as negative numbers, same as libuv. */
1199 req->result = e->res;
1200
1201 switch (req->fs_type) {
1202 case UV_FS_FSTAT:
1203 case UV_FS_LSTAT:
1204 case UV_FS_STAT:
1205 uv__iou_fs_statx_post(req);
1206 break;
1207 default: /* Squelch -Wswitch warnings. */
1208 break;
1209 }
1210
1211 uv__metrics_update_idle_time(loop);
1212 req->cb(req);
1213 nevents++;
1214 }
1215
1216 atomic_store_explicit((_Atomic uint32_t*) iou->cqhead,
1217 tail,
1218 memory_order_release);
1219
1220 /* Check whether CQE's overflowed, if so enter the kernel to make them
1221 * available. Don't grab them immediately but in the next loop iteration to
1222 * avoid loop starvation. */
1223 flags = atomic_load_explicit((_Atomic uint32_t*) iou->sqflags,
1224 memory_order_acquire);
1225
1226 if (flags & UV__IORING_SQ_CQ_OVERFLOW) {
1227 do
1228 rc = uv__io_uring_enter(iou->ringfd, 0, 0, UV__IORING_ENTER_GETEVENTS);
1229 while (rc == -1 && errno == EINTR);
1230
1231 if (rc < 0)
1232 perror("libuv: io_uring_enter(getevents)"); /* Can't happen. */
1233 }
1234
1235 uv__metrics_inc_events(loop, nevents);
1236 if (uv__get_internal_fields(loop)->current_timeout == 0)
1237 uv__metrics_inc_events_waiting(loop, nevents);
1238 }
1239
1240
uv__epoll_ctl_prep(int epollfd, struct uv__iou* ctl, struct epoll_event (*events)[256], int op, int fd, struct epoll_event* e)1241 static void uv__epoll_ctl_prep(int epollfd,
1242 struct uv__iou* ctl,
1243 struct epoll_event (*events)[256],
1244 int op,
1245 int fd,
1246 struct epoll_event* e) {
1247 struct uv__io_uring_sqe* sqe;
1248 struct epoll_event* pe;
1249 uint32_t mask;
1250 uint32_t slot;
1251 int ret = 0;
1252
1253 if (ctl->ringfd == -1) {
1254 if (!uv__epoll_ctl(epollfd, op, fd, e))
1255 return;
1256
1257 if (op == EPOLL_CTL_DEL)
1258 return; /* Ignore errors, may be racing with another thread. */
1259
1260 if (op != EPOLL_CTL_ADD) {
1261 #ifdef PRINT_ERRNO_ABORT
1262 UV_ERRNO_ABORT("errno is %d, fd is %d, backend_fd is %d(%s:%s:%d)",
1263 errno, fd, epollfd, __FILE__, __func__, __LINE__);
1264 #else
1265 abort();
1266 #endif
1267 }
1268
1269 if (errno != EEXIST) {
1270 #ifdef PRINT_ERRNO_ABORT
1271 UV_ERRNO_ABORT("errno is %d, fd is %d, backend_fd is %d(%s:%s:%d)",
1272 errno, fd, epollfd, __FILE__, __func__, __LINE__);
1273 #else
1274 abort();
1275 #endif
1276 }
1277
1278 /* File descriptor that's been watched before, update event mask. */
1279 ret = uv__epoll_ctl(epollfd, EPOLL_CTL_MOD, fd, e);
1280 if (!ret)
1281 return;
1282
1283 #ifdef PRINT_ERRNO_ABORT
1284 UV_ERRNO_ABORT("errno is %d, uv__epoll_ctl ret is %d, fd is %d, backend_fd is %d(%s:%s:%d)",
1285 errno, ret, fd, epollfd, __FILE__, __func__, __LINE__);
1286 #else
1287 abort();
1288 #endif
1289 } else {
1290 mask = ctl->sqmask;
1291 slot = (*ctl->sqtail)++ & mask;
1292
1293 pe = &(*events)[slot];
1294 *pe = *e;
1295
1296 sqe = ctl->sqe;
1297 sqe = &sqe[slot];
1298
1299 memset(sqe, 0, sizeof(*sqe));
1300 sqe->addr = (uintptr_t) pe;
1301 sqe->fd = epollfd;
1302 sqe->len = op;
1303 sqe->off = fd;
1304 sqe->opcode = UV__IORING_OP_EPOLL_CTL;
1305 sqe->user_data = op | slot << 2 | (int64_t) fd << 32;
1306
1307 if ((*ctl->sqhead & mask) == (*ctl->sqtail & mask))
1308 uv__epoll_ctl_flush(epollfd, ctl, events);
1309 }
1310 }
1311
1312
uv__epoll_ctl_flush(int epollfd, struct uv__iou* ctl, struct epoll_event (*events)[256])1313 static void uv__epoll_ctl_flush(int epollfd,
1314 struct uv__iou* ctl,
1315 struct epoll_event (*events)[256]) {
1316 struct epoll_event oldevents[256];
1317 struct uv__io_uring_cqe* cqe;
1318 uint32_t oldslot;
1319 uint32_t slot;
1320 uint32_t n;
1321 int fd;
1322 int op;
1323 int rc;
1324
1325 STATIC_ASSERT(sizeof(oldevents) == sizeof(*events));
1326 assert(ctl->ringfd != -1);
1327 assert(*ctl->sqhead != *ctl->sqtail);
1328
1329 n = *ctl->sqtail - *ctl->sqhead;
1330 do
1331 rc = uv__io_uring_enter(ctl->ringfd, n, n, UV__IORING_ENTER_GETEVENTS);
1332 while (rc == -1 && errno == EINTR);
1333
1334 if (rc < 0)
1335 perror("libuv: io_uring_enter(getevents)"); /* Can't happen. */
1336
1337 if (rc != (int) n)
1338 abort();
1339
1340 assert(*ctl->sqhead == *ctl->sqtail);
1341
1342 memcpy(oldevents, *events, sizeof(*events));
1343
1344 /* Failed submissions are either EPOLL_CTL_DEL commands for file descriptors
1345 * that have been closed, or EPOLL_CTL_ADD commands for file descriptors
1346 * that we are already watching. Ignore the former and retry the latter
1347 * with EPOLL_CTL_MOD.
1348 */
1349 while (*ctl->cqhead != *ctl->cqtail) {
1350 slot = (*ctl->cqhead)++ & ctl->cqmask;
1351
1352 cqe = ctl->cqe;
1353 cqe = &cqe[slot];
1354
1355 if (cqe->res == 0)
1356 continue;
1357
1358 fd = cqe->user_data >> 32;
1359 op = 3 & cqe->user_data;
1360 oldslot = 255 & (cqe->user_data >> 2);
1361
1362 if (op == EPOLL_CTL_DEL)
1363 continue;
1364
1365 if (op != EPOLL_CTL_ADD)
1366 abort();
1367
1368 if (cqe->res != -EEXIST)
1369 abort();
1370
1371 uv__epoll_ctl_prep(epollfd,
1372 ctl,
1373 events,
1374 EPOLL_CTL_MOD,
1375 fd,
1376 &oldevents[oldslot]);
1377 }
1378 }
1379
1380
uv__io_poll(uv_loop_t* loop, int timeout)1381 void uv__io_poll(uv_loop_t* loop, int timeout) {
1382 uv__loop_internal_fields_t* lfields;
1383 struct epoll_event events[1024];
1384 struct epoll_event prep[256];
1385 struct uv__invalidate inv;
1386 struct epoll_event* pe;
1387 struct epoll_event e;
1388 struct uv__iou* ctl;
1389 struct uv__iou* iou;
1390 int real_timeout;
1391 struct uv__queue* q;
1392 uv__io_t* w;
1393 sigset_t* sigmask;
1394 sigset_t sigset;
1395 uint64_t base;
1396 int have_iou_events;
1397 int have_signals;
1398 int nevents;
1399 int epollfd;
1400 int count;
1401 int nfds;
1402 int fd;
1403 int op;
1404 int i;
1405 int user_timeout;
1406 int reset_timeout;
1407
1408 lfields = uv__get_internal_fields(loop);
1409 ctl = &lfields->ctl;
1410 iou = &lfields->iou;
1411
1412 sigmask = NULL;
1413 if (loop->flags & UV_LOOP_BLOCK_SIGPROF) {
1414 sigemptyset(&sigset);
1415 sigaddset(&sigset, SIGPROF);
1416 sigmask = &sigset;
1417 }
1418
1419 assert(timeout >= -1);
1420 base = loop->time;
1421 count = 48; /* Benchmarks suggest this gives the best throughput. */
1422 real_timeout = timeout;
1423
1424 if (lfields->flags & UV_METRICS_IDLE_TIME) {
1425 reset_timeout = 1;
1426 user_timeout = timeout;
1427 timeout = 0;
1428 } else {
1429 reset_timeout = 0;
1430 user_timeout = 0;
1431 }
1432
1433 epollfd = loop->backend_fd;
1434
1435 memset(&e, 0, sizeof(e));
1436
1437 while (!uv__queue_empty(&loop->watcher_queue)) {
1438 q = uv__queue_head(&loop->watcher_queue);
1439 w = uv__queue_data(q, uv__io_t, watcher_queue);
1440 uv__queue_remove(q);
1441 uv__queue_init(q);
1442
1443 op = EPOLL_CTL_MOD;
1444 if (w->events == 0)
1445 op = EPOLL_CTL_ADD;
1446
1447 w->events = w->pevents;
1448 e.events = w->pevents;
1449 e.data.fd = w->fd;
1450
1451 uv__epoll_ctl_prep(epollfd, ctl, &prep, op, w->fd, &e);
1452 }
1453
1454 inv.events = events;
1455 inv.prep = &prep;
1456 inv.nfds = -1;
1457
1458 for (;;) {
1459 if (loop->nfds == 0)
1460 if (iou->in_flight == 0)
1461 break;
1462
1463 /* All event mask mutations should be visible to the kernel before
1464 * we enter epoll_pwait().
1465 */
1466 if (ctl->ringfd != -1)
1467 while (*ctl->sqhead != *ctl->sqtail)
1468 uv__epoll_ctl_flush(epollfd, ctl, &prep);
1469
1470 /* Only need to set the provider_entry_time if timeout != 0. The function
1471 * will return early if the loop isn't configured with UV_METRICS_IDLE_TIME.
1472 */
1473 if (timeout != 0)
1474 uv__metrics_set_provider_entry_time(loop);
1475
1476 /* Store the current timeout in a location that's globally accessible so
1477 * other locations like uv__work_done() can determine whether the queue
1478 * of events in the callback were waiting when poll was called.
1479 */
1480 lfields->current_timeout = timeout;
1481 #ifdef USE_FFRT
1482 if (ffrt_get_cur_task() == NULL) {
1483 nfds = epoll_pwait(epollfd, events, ARRAY_SIZE(events), timeout, sigmask);
1484 } else {
1485 nfds = uv__epoll_wait(events, ARRAY_SIZE(events), timeout);
1486 }
1487 #else
1488 nfds = epoll_pwait(epollfd, events, ARRAY_SIZE(events), timeout, sigmask);
1489 #endif
1490
1491 /* Update loop->time unconditionally. It's tempting to skip the update when
1492 * timeout == 0 (i.e. non-blocking poll) but there is no guarantee that the
1493 * operating system didn't reschedule our process while in the syscall.
1494 */
1495 SAVE_ERRNO(uv__update_time(loop));
1496
1497 if (nfds == -1)
1498 assert(errno == EINTR);
1499 else if (nfds == 0)
1500 /* Unlimited timeout should only return with events or signal. */
1501 assert(timeout != -1);
1502
1503 if (nfds == 0 || nfds == -1) {
1504 if (reset_timeout != 0) {
1505 timeout = user_timeout;
1506 reset_timeout = 0;
1507 } else if (nfds == 0) {
1508 return;
1509 }
1510
1511 /* Interrupted by a signal. Update timeout and poll again. */
1512 goto update_timeout;
1513 }
1514
1515 have_iou_events = 0;
1516 have_signals = 0;
1517 nevents = 0;
1518
1519 inv.nfds = nfds;
1520 lfields->inv = &inv;
1521
1522 for (i = 0; i < nfds; i++) {
1523 pe = events + i;
1524 fd = pe->data.fd;
1525
1526 /* Skip invalidated events, see uv__platform_invalidate_fd */
1527 if (fd == -1)
1528 continue;
1529
1530 if (fd == iou->ringfd) {
1531 uv__poll_io_uring(loop, iou);
1532 have_iou_events = 1;
1533 continue;
1534 }
1535
1536 #ifndef USE_OHOS_DFX
1537 assert(fd >= 0);
1538 assert((unsigned) fd < loop->nwatchers);
1539 #else
1540 if (fd < 0 || (unsigned) fd >= loop->nwatchers)
1541 continue;
1542 #endif
1543
1544 w = loop->watchers[fd];
1545
1546 if (w == NULL) {
1547 /* File descriptor that we've stopped watching, disarm it.
1548 *
1549 * Ignore all errors because we may be racing with another thread
1550 * when the file descriptor is closed.
1551 */
1552 uv__epoll_ctl_prep(epollfd, ctl, &prep, EPOLL_CTL_DEL, fd, pe);
1553 continue;
1554 }
1555
1556 /* Give users only events they're interested in. Prevents spurious
1557 * callbacks when previous callback invocation in this loop has stopped
1558 * the current watcher. Also, filters out events that users has not
1559 * requested us to watch.
1560 */
1561 pe->events &= w->pevents | POLLERR | POLLHUP;
1562
1563 /* Work around an epoll quirk where it sometimes reports just the
1564 * EPOLLERR or EPOLLHUP event. In order to force the event loop to
1565 * move forward, we merge in the read/write events that the watcher
1566 * is interested in; uv__read() and uv__write() will then deal with
1567 * the error or hangup in the usual fashion.
1568 *
1569 * Note to self: happens when epoll reports EPOLLIN|EPOLLHUP, the user
1570 * reads the available data, calls uv_read_stop(), then sometime later
1571 * calls uv_read_start() again. By then, libuv has forgotten about the
1572 * hangup and the kernel won't report EPOLLIN again because there's
1573 * nothing left to read. If anything, libuv is to blame here. The
1574 * current hack is just a quick bandaid; to properly fix it, libuv
1575 * needs to remember the error/hangup event. We should get that for
1576 * free when we switch over to edge-triggered I/O.
1577 */
1578 if (pe->events == POLLERR || pe->events == POLLHUP)
1579 pe->events |=
1580 w->pevents & (POLLIN | POLLOUT | UV__POLLRDHUP | UV__POLLPRI);
1581
1582 if (pe->events != 0) {
1583 /* Run signal watchers last. This also affects child process watchers
1584 * because those are implemented in terms of signal watchers.
1585 */
1586 if (w == &loop->signal_io_watcher) {
1587 have_signals = 1;
1588 } else {
1589 uv__metrics_update_idle_time(loop);
1590 w->cb(loop, w, pe->events);
1591 }
1592
1593 nevents++;
1594 }
1595 }
1596
1597 uv__metrics_inc_events(loop, nevents);
1598 if (reset_timeout != 0) {
1599 timeout = user_timeout;
1600 reset_timeout = 0;
1601 uv__metrics_inc_events_waiting(loop, nevents);
1602 }
1603
1604 if (have_signals != 0) {
1605 uv__metrics_update_idle_time(loop);
1606 loop->signal_io_watcher.cb(loop, &loop->signal_io_watcher, POLLIN);
1607 }
1608
1609 lfields->inv = NULL;
1610
1611 if (have_iou_events != 0)
1612 break; /* Event loop should cycle now so don't poll again. */
1613
1614 if (have_signals != 0)
1615 break; /* Event loop should cycle now so don't poll again. */
1616
1617 if (nevents != 0) {
1618 if (nfds == ARRAY_SIZE(events) && --count != 0) {
1619 /* Poll for more events but don't block this time. */
1620 timeout = 0;
1621 continue;
1622 }
1623 break;
1624 }
1625
1626 update_timeout:
1627 if (timeout == 0)
1628 break;
1629
1630 if (timeout == -1)
1631 continue;
1632
1633 assert(timeout > 0);
1634
1635 real_timeout -= (loop->time - base);
1636 if (real_timeout <= 0)
1637 break;
1638
1639 timeout = real_timeout;
1640 }
1641
1642 if (ctl->ringfd != -1)
1643 while (*ctl->sqhead != *ctl->sqtail)
1644 uv__epoll_ctl_flush(epollfd, ctl, &prep);
1645 }
1646
uv__hrtime(uv_clocktype_t type)1647 uint64_t uv__hrtime(uv_clocktype_t type) {
1648 static _Atomic clock_t fast_clock_id = -1;
1649 struct timespec t;
1650 clock_t clock_id;
1651
1652 /* Prefer CLOCK_MONOTONIC_COARSE if available but only when it has
1653 * millisecond granularity or better. CLOCK_MONOTONIC_COARSE is
1654 * serviced entirely from the vDSO, whereas CLOCK_MONOTONIC may
1655 * decide to make a costly system call.
1656 */
1657 /* TODO(bnoordhuis) Use CLOCK_MONOTONIC_COARSE for UV_CLOCK_PRECISE
1658 * when it has microsecond granularity or better (unlikely).
1659 */
1660 clock_id = CLOCK_MONOTONIC;
1661 if (type != UV_CLOCK_FAST)
1662 goto done;
1663
1664 clock_id = atomic_load_explicit(&fast_clock_id, memory_order_relaxed);
1665 if (clock_id != -1)
1666 goto done;
1667
1668 clock_id = CLOCK_MONOTONIC;
1669 if (0 == clock_getres(CLOCK_MONOTONIC_COARSE, &t))
1670 if (t.tv_nsec <= 1 * 1000 * 1000)
1671 clock_id = CLOCK_MONOTONIC_COARSE;
1672
1673 atomic_store_explicit(&fast_clock_id, clock_id, memory_order_relaxed);
1674
1675 done:
1676
1677 if (clock_gettime(clock_id, &t))
1678 return 0; /* Not really possible. */
1679
1680 return t.tv_sec * (uint64_t) 1e9 + t.tv_nsec;
1681 }
1682
1683
uv_resident_set_memory(size_t* rss)1684 int uv_resident_set_memory(size_t* rss) {
1685 char buf[1024];
1686 const char* s;
1687 ssize_t n;
1688 long val;
1689 int fd;
1690 int i;
1691
1692 do
1693 fd = open("/proc/self/stat", O_RDONLY);
1694 while (fd == -1 && errno == EINTR);
1695
1696 if (fd == -1)
1697 return UV__ERR(errno);
1698
1699 do
1700 n = read(fd, buf, sizeof(buf) - 1);
1701 while (n == -1 && errno == EINTR);
1702
1703 uv__close(fd);
1704 if (n == -1)
1705 return UV__ERR(errno);
1706 buf[n] = '\0';
1707
1708 s = strchr(buf, ' ');
1709 if (s == NULL)
1710 goto err;
1711
1712 s += 1;
1713 if (*s != '(')
1714 goto err;
1715
1716 s = strchr(s, ')');
1717 if (s == NULL)
1718 goto err;
1719
1720 for (i = 1; i <= 22; i++) {
1721 s = strchr(s + 1, ' ');
1722 if (s == NULL)
1723 goto err;
1724 }
1725
1726 errno = 0;
1727 val = strtol(s, NULL, 10);
1728 if (errno != 0)
1729 goto err;
1730 if (val < 0)
1731 goto err;
1732
1733 *rss = val * getpagesize();
1734 return 0;
1735
1736 err:
1737 return UV_EINVAL;
1738 }
1739
uv_uptime(double* uptime)1740 int uv_uptime(double* uptime) {
1741 struct timespec now;
1742 char buf[128];
1743
1744 /* Consult /proc/uptime when present (common case), or fall back to
1745 * clock_gettime. Why not always clock_gettime? It doesn't always return the
1746 * right result under OpenVZ and possibly other containerized environments.
1747 */
1748 if (0 == uv__slurp("/proc/uptime", buf, sizeof(buf)))
1749 if (1 == sscanf(buf, "%lf", uptime))
1750 return 0;
1751
1752 if (clock_gettime(CLOCK_BOOTTIME, &now))
1753 return UV__ERR(errno);
1754
1755 *uptime = now.tv_sec;
1756 return 0;
1757 }
1758
1759
uv_cpu_info(uv_cpu_info_t** ci, int* count)1760 int uv_cpu_info(uv_cpu_info_t** ci, int* count) {
1761 #if defined(__PPC__)
1762 static const char model_marker[] = "cpu\t\t: ";
1763 #elif defined(__arm__)
1764 static const char model_marker[] = "Processor\t: ";
1765 #elif defined(__aarch64__)
1766 static const char model_marker[] = "CPU part\t: ";
1767 #elif defined(__mips__)
1768 static const char model_marker[] = "cpu model\t\t: ";
1769 #elif defined(__loongarch__)
1770 static const char model_marker[] = "cpu family\t\t: ";
1771 #else
1772 static const char model_marker[] = "model name\t: ";
1773 #endif
1774 static const char parts[] =
1775 #ifdef __aarch64__
1776 "0x811\nARM810\n" "0x920\nARM920\n" "0x922\nARM922\n"
1777 "0x926\nARM926\n" "0x940\nARM940\n" "0x946\nARM946\n"
1778 "0x966\nARM966\n" "0xa20\nARM1020\n" "0xa22\nARM1022\n"
1779 "0xa26\nARM1026\n" "0xb02\nARM11 MPCore\n" "0xb36\nARM1136\n"
1780 "0xb56\nARM1156\n" "0xb76\nARM1176\n" "0xc05\nCortex-A5\n"
1781 "0xc07\nCortex-A7\n" "0xc08\nCortex-A8\n" "0xc09\nCortex-A9\n"
1782 "0xc0d\nCortex-A17\n" /* Originally A12 */
1783 "0xc0f\nCortex-A15\n" "0xc0e\nCortex-A17\n" "0xc14\nCortex-R4\n"
1784 "0xc15\nCortex-R5\n" "0xc17\nCortex-R7\n" "0xc18\nCortex-R8\n"
1785 "0xc20\nCortex-M0\n" "0xc21\nCortex-M1\n" "0xc23\nCortex-M3\n"
1786 "0xc24\nCortex-M4\n" "0xc27\nCortex-M7\n" "0xc60\nCortex-M0+\n"
1787 "0xd01\nCortex-A32\n" "0xd03\nCortex-A53\n" "0xd04\nCortex-A35\n"
1788 "0xd05\nCortex-A55\n" "0xd06\nCortex-A65\n" "0xd07\nCortex-A57\n"
1789 "0xd08\nCortex-A72\n" "0xd09\nCortex-A73\n" "0xd0a\nCortex-A75\n"
1790 "0xd0b\nCortex-A76\n" "0xd0c\nNeoverse-N1\n" "0xd0d\nCortex-A77\n"
1791 "0xd0e\nCortex-A76AE\n" "0xd13\nCortex-R52\n" "0xd20\nCortex-M23\n"
1792 "0xd21\nCortex-M33\n" "0xd41\nCortex-A78\n" "0xd42\nCortex-A78AE\n"
1793 "0xd4a\nNeoverse-E1\n" "0xd4b\nCortex-A78C\n"
1794 #endif
1795 "";
1796 struct cpu {
1797 unsigned long long freq, user, nice, sys, idle, irq;
1798 unsigned model;
1799 };
1800 FILE* fp;
1801 char* p;
1802 int found;
1803 int n;
1804 unsigned i;
1805 unsigned cpu;
1806 unsigned maxcpu;
1807 unsigned size;
1808 unsigned long long skip;
1809 struct cpu (*cpus)[8192]; /* Kernel maximum. */
1810 struct cpu* c;
1811 struct cpu t;
1812 char (*model)[64];
1813 unsigned char bitmap[ARRAY_SIZE(*cpus) / 8];
1814 /* Assumption: even big.LITTLE systems will have only a handful
1815 * of different CPU models. Most systems will just have one.
1816 */
1817 char models[8][64];
1818 char buf[1024];
1819
1820 memset(bitmap, 0, sizeof(bitmap));
1821 memset(models, 0, sizeof(models));
1822 snprintf(*models, sizeof(*models), "unknown");
1823 maxcpu = 0;
1824
1825 cpus = uv__calloc(ARRAY_SIZE(*cpus), sizeof(**cpus));
1826 if (cpus == NULL)
1827 return UV_ENOMEM;
1828
1829 fp = uv__open_file("/proc/stat");
1830 if (fp == NULL) {
1831 uv__free(cpus);
1832 return UV__ERR(errno);
1833 }
1834
1835 if (NULL == fgets(buf, sizeof(buf), fp))
1836 abort();
1837
1838 for (;;) {
1839 memset(&t, 0, sizeof(t));
1840
1841 n = fscanf(fp, "cpu%u %llu %llu %llu %llu %llu %llu",
1842 &cpu, &t.user, &t.nice, &t.sys, &t.idle, &skip, &t.irq);
1843
1844 if (n != 7)
1845 break;
1846
1847 if (NULL == fgets(buf, sizeof(buf), fp))
1848 abort();
1849
1850 if (cpu >= ARRAY_SIZE(*cpus))
1851 continue;
1852
1853 (*cpus)[cpu] = t;
1854
1855 bitmap[cpu >> 3] |= 1 << (cpu & 7);
1856
1857 if (cpu >= maxcpu)
1858 maxcpu = cpu + 1;
1859 }
1860
1861 fclose(fp);
1862
1863 fp = uv__open_file("/proc/cpuinfo");
1864 if (fp == NULL)
1865 goto nocpuinfo;
1866
1867 for (;;) {
1868 if (1 != fscanf(fp, "processor\t: %u\n", &cpu))
1869 break; /* Parse error. */
1870
1871 found = 0;
1872 while (!found && fgets(buf, sizeof(buf), fp))
1873 found = !strncmp(buf, model_marker, sizeof(model_marker) - 1);
1874
1875 if (!found)
1876 goto next;
1877
1878 p = buf + sizeof(model_marker) - 1;
1879 n = (int) strcspn(p, "\n");
1880
1881 /* arm64: translate CPU part code to model name. */
1882 if (*parts) {
1883 p = memmem(parts, sizeof(parts) - 1, p, n + 1);
1884 if (p == NULL)
1885 p = "unknown";
1886 else
1887 p += n + 1;
1888 n = (int) strcspn(p, "\n");
1889 }
1890
1891 found = 0;
1892 for (model = models; !found && model < ARRAY_END(models); model++)
1893 found = !strncmp(p, *model, strlen(*model));
1894
1895 if (!found)
1896 goto next;
1897
1898 if (**model == '\0')
1899 snprintf(*model, sizeof(*model), "%.*s", n, p);
1900
1901 if (cpu < maxcpu)
1902 (*cpus)[cpu].model = model - models;
1903
1904 next:
1905 while (fgets(buf, sizeof(buf), fp))
1906 if (*buf == '\n')
1907 break;
1908 }
1909
1910 fclose(fp);
1911 fp = NULL;
1912
1913 nocpuinfo:
1914
1915 n = 0;
1916 for (cpu = 0; cpu < maxcpu; cpu++) {
1917 if (!(bitmap[cpu >> 3] & (1 << (cpu & 7))))
1918 continue;
1919
1920 n++;
1921 snprintf(buf, sizeof(buf),
1922 "/sys/devices/system/cpu/cpu%u/cpufreq/scaling_cur_freq", cpu);
1923
1924 fp = uv__open_file(buf);
1925 if (fp == NULL)
1926 continue;
1927
1928 if (1 != fscanf(fp, "%llu", &(*cpus)[cpu].freq))
1929 abort();
1930 fclose(fp);
1931 fp = NULL;
1932 }
1933
1934 size = n * sizeof(**ci) + sizeof(models);
1935 *ci = uv__malloc(size);
1936 *count = 0;
1937
1938 if (*ci == NULL) {
1939 uv__free(cpus);
1940 return UV_ENOMEM;
1941 }
1942
1943 *count = n;
1944 p = memcpy(*ci + n, models, sizeof(models));
1945
1946 i = 0;
1947 for (cpu = 0; cpu < maxcpu; cpu++) {
1948 if (!(bitmap[cpu >> 3] & (1 << (cpu & 7))))
1949 continue;
1950
1951 c = *cpus + cpu;
1952
1953 (*ci)[i++] = (uv_cpu_info_t) {
1954 .model = p + c->model * sizeof(*model),
1955 .speed = c->freq / 1000,
1956 /* Note: sysconf(_SC_CLK_TCK) is fixed at 100 Hz,
1957 * therefore the multiplier is always 1000/100 = 10.
1958 */
1959 .cpu_times = (struct uv_cpu_times_s) {
1960 .user = 10 * c->user,
1961 .nice = 10 * c->nice,
1962 .sys = 10 * c->sys,
1963 .idle = 10 * c->idle,
1964 .irq = 10 * c->irq,
1965 },
1966 };
1967 }
1968
1969 uv__free(cpus);
1970
1971 return 0;
1972 }
1973
1974
uv__ifaddr_exclude(struct ifaddrs *ent, int exclude_type)1975 static int uv__ifaddr_exclude(struct ifaddrs *ent, int exclude_type) {
1976 if (!((ent->ifa_flags & IFF_UP) && (ent->ifa_flags & IFF_RUNNING)))
1977 return 1;
1978 if (ent->ifa_addr == NULL)
1979 return 1;
1980 /*
1981 * On Linux getifaddrs returns information related to the raw underlying
1982 * devices. We're not interested in this information yet.
1983 */
1984 if (ent->ifa_addr->sa_family == PF_PACKET)
1985 return exclude_type;
1986 return !exclude_type;
1987 }
1988
uv_interface_addresses(uv_interface_address_t** addresses, int* count)1989 int uv_interface_addresses(uv_interface_address_t** addresses, int* count) {
1990 struct ifaddrs *addrs, *ent;
1991 uv_interface_address_t* address;
1992 int i;
1993 struct sockaddr_ll *sll;
1994
1995 *count = 0;
1996 *addresses = NULL;
1997
1998 if (getifaddrs(&addrs))
1999 return UV__ERR(errno);
2000
2001 /* Count the number of interfaces */
2002 for (ent = addrs; ent != NULL; ent = ent->ifa_next) {
2003 if (uv__ifaddr_exclude(ent, UV__EXCLUDE_IFADDR))
2004 continue;
2005
2006 (*count)++;
2007 }
2008
2009 if (*count == 0) {
2010 freeifaddrs(addrs);
2011 return 0;
2012 }
2013
2014 /* Make sure the memory is initiallized to zero using calloc() */
2015 *addresses = uv__calloc(*count, sizeof(**addresses));
2016 if (!(*addresses)) {
2017 freeifaddrs(addrs);
2018 return UV_ENOMEM;
2019 }
2020
2021 address = *addresses;
2022
2023 for (ent = addrs; ent != NULL; ent = ent->ifa_next) {
2024 if (uv__ifaddr_exclude(ent, UV__EXCLUDE_IFADDR))
2025 continue;
2026
2027 address->name = uv__strdup(ent->ifa_name);
2028
2029 if (ent->ifa_addr->sa_family == AF_INET6) {
2030 address->address.address6 = *((struct sockaddr_in6*) ent->ifa_addr);
2031 } else {
2032 address->address.address4 = *((struct sockaddr_in*) ent->ifa_addr);
2033 }
2034
2035 if (ent->ifa_netmask->sa_family == AF_INET6) {
2036 address->netmask.netmask6 = *((struct sockaddr_in6*) ent->ifa_netmask);
2037 } else {
2038 address->netmask.netmask4 = *((struct sockaddr_in*) ent->ifa_netmask);
2039 }
2040
2041 address->is_internal = !!(ent->ifa_flags & IFF_LOOPBACK);
2042
2043 address++;
2044 }
2045
2046 /* Fill in physical addresses for each interface */
2047 for (ent = addrs; ent != NULL; ent = ent->ifa_next) {
2048 if (uv__ifaddr_exclude(ent, UV__EXCLUDE_IFPHYS))
2049 continue;
2050
2051 address = *addresses;
2052
2053 for (i = 0; i < (*count); i++) {
2054 size_t namelen = strlen(ent->ifa_name);
2055 /* Alias interface share the same physical address */
2056 if (strncmp(address->name, ent->ifa_name, namelen) == 0 &&
2057 (address->name[namelen] == 0 || address->name[namelen] == ':')) {
2058 sll = (struct sockaddr_ll*)ent->ifa_addr;
2059 memcpy(address->phys_addr, sll->sll_addr, sizeof(address->phys_addr));
2060 }
2061 address++;
2062 }
2063 }
2064
2065 freeifaddrs(addrs);
2066
2067 return 0;
2068 }
2069
2070
uv_free_interface_addresses(uv_interface_address_t* addresses, int count)2071 void uv_free_interface_addresses(uv_interface_address_t* addresses,
2072 int count) {
2073 int i;
2074
2075 for (i = 0; i < count; i++) {
2076 uv__free(addresses[i].name);
2077 }
2078
2079 uv__free(addresses);
2080 }
2081
2082
uv__set_process_title(const char* title)2083 void uv__set_process_title(const char* title) {
2084 #if defined(PR_SET_NAME)
2085 prctl(PR_SET_NAME, title); /* Only copies first 16 characters. */
2086 #endif
2087 }
2088
2089
uv__read_proc_meminfo(const char* what)2090 static uint64_t uv__read_proc_meminfo(const char* what) {
2091 uint64_t rc;
2092 char* p;
2093 char buf[4096]; /* Large enough to hold all of /proc/meminfo. */
2094
2095 if (uv__slurp("/proc/meminfo", buf, sizeof(buf)))
2096 return 0;
2097
2098 p = strstr(buf, what);
2099
2100 if (p == NULL)
2101 return 0;
2102
2103 p += strlen(what);
2104
2105 rc = 0;
2106 sscanf(p, "%" PRIu64 " kB", &rc);
2107
2108 return rc * 1024;
2109 }
2110
2111
uv_get_free_memory(void)2112 uint64_t uv_get_free_memory(void) {
2113 struct sysinfo info;
2114 uint64_t rc;
2115
2116 rc = uv__read_proc_meminfo("MemAvailable:");
2117
2118 if (rc != 0)
2119 return rc;
2120
2121 if (0 == sysinfo(&info))
2122 return (uint64_t) info.freeram * info.mem_unit;
2123
2124 return 0;
2125 }
2126
2127
uv_get_total_memory(void)2128 uint64_t uv_get_total_memory(void) {
2129 struct sysinfo info;
2130 uint64_t rc;
2131
2132 rc = uv__read_proc_meminfo("MemTotal:");
2133
2134 if (rc != 0)
2135 return rc;
2136
2137 if (0 == sysinfo(&info))
2138 return (uint64_t) info.totalram * info.mem_unit;
2139
2140 return 0;
2141 }
2142
2143
uv__read_uint64(const char* filename)2144 static uint64_t uv__read_uint64(const char* filename) {
2145 char buf[32]; /* Large enough to hold an encoded uint64_t. */
2146 uint64_t rc;
2147
2148 rc = 0;
2149 if (0 == uv__slurp(filename, buf, sizeof(buf)))
2150 if (1 != sscanf(buf, "%" PRIu64, &rc))
2151 if (0 == strcmp(buf, "max\n"))
2152 rc = UINT64_MAX;
2153
2154 return rc;
2155 }
2156
2157
2158 /* Given a buffer with the contents of a cgroup1 /proc/self/cgroups,
2159 * finds the location and length of the memory controller mount path.
2160 * This disregards the leading / for easy concatenation of paths.
2161 * Returns NULL if the memory controller wasn't found. */
uv__cgroup1_find_memory_controller(char buf[static 1024], int* n)2162 static char* uv__cgroup1_find_memory_controller(char buf[static 1024],
2163 int* n) {
2164 char* p;
2165
2166 /* Seek to the memory controller line. */
2167 p = strchr(buf, ':');
2168 while (p != NULL && strncmp(p, ":memory:", 8)) {
2169 p = strchr(p, '\n');
2170 if (p != NULL)
2171 p = strchr(p, ':');
2172 }
2173
2174 if (p != NULL) {
2175 /* Determine the length of the mount path. */
2176 p = p + strlen(":memory:/");
2177 *n = (int) strcspn(p, "\n");
2178 }
2179
2180 return p;
2181 }
2182
uv__get_cgroup1_memory_limits(char buf[static 1024], uint64_t* high, uint64_t* max)2183 static void uv__get_cgroup1_memory_limits(char buf[static 1024], uint64_t* high,
2184 uint64_t* max) {
2185 char filename[4097];
2186 char* p;
2187 int n;
2188 uint64_t cgroup1_max;
2189
2190 /* Find out where the controller is mounted. */
2191 p = uv__cgroup1_find_memory_controller(buf, &n);
2192 if (p != NULL) {
2193 snprintf(filename, sizeof(filename),
2194 "/sys/fs/cgroup/memory/%.*s/memory.soft_limit_in_bytes", n, p);
2195 *high = uv__read_uint64(filename);
2196
2197 snprintf(filename, sizeof(filename),
2198 "/sys/fs/cgroup/memory/%.*s/memory.limit_in_bytes", n, p);
2199 *max = uv__read_uint64(filename);
2200
2201 /* If the controller wasn't mounted, the reads above will have failed,
2202 * as indicated by uv__read_uint64 returning 0.
2203 */
2204 if (*high != 0 && *max != 0)
2205 goto update_limits;
2206 }
2207
2208 /* Fall back to the limits of the global memory controller. */
2209 *high = uv__read_uint64("/sys/fs/cgroup/memory/memory.soft_limit_in_bytes");
2210 *max = uv__read_uint64("/sys/fs/cgroup/memory/memory.limit_in_bytes");
2211
2212 /* uv__read_uint64 detects cgroup2's "max", so we need to separately detect
2213 * cgroup1's maximum value (which is derived from LONG_MAX and PAGE_SIZE).
2214 */
2215 update_limits:
2216 cgroup1_max = LONG_MAX & ~(sysconf(_SC_PAGESIZE) - 1);
2217 if (*high == cgroup1_max)
2218 *high = UINT64_MAX;
2219 if (*max == cgroup1_max)
2220 *max = UINT64_MAX;
2221 }
2222
uv__get_cgroup2_memory_limits(char buf[static 1024], uint64_t* high, uint64_t* max)2223 static void uv__get_cgroup2_memory_limits(char buf[static 1024], uint64_t* high,
2224 uint64_t* max) {
2225 char filename[4097];
2226 char* p;
2227 int n;
2228
2229 /* Find out where the controller is mounted. */
2230 p = buf + strlen("0::/");
2231 n = (int) strcspn(p, "\n");
2232
2233 /* Read the memory limits of the controller. */
2234 snprintf(filename, sizeof(filename), "/sys/fs/cgroup/%.*s/memory.max", n, p);
2235 *max = uv__read_uint64(filename);
2236 snprintf(filename, sizeof(filename), "/sys/fs/cgroup/%.*s/memory.high", n, p);
2237 *high = uv__read_uint64(filename);
2238 }
2239
uv__get_cgroup_constrained_memory(char buf[static 1024])2240 static uint64_t uv__get_cgroup_constrained_memory(char buf[static 1024]) {
2241 uint64_t high;
2242 uint64_t max;
2243
2244 /* In the case of cgroupv2, we'll only have a single entry. */
2245 if (strncmp(buf, "0::/", 4))
2246 uv__get_cgroup1_memory_limits(buf, &high, &max);
2247 else
2248 uv__get_cgroup2_memory_limits(buf, &high, &max);
2249
2250 if (high == 0 || max == 0)
2251 return 0;
2252
2253 return high < max ? high : max;
2254 }
2255
uv_get_constrained_memory(void)2256 uint64_t uv_get_constrained_memory(void) {
2257 char buf[1024];
2258
2259 if (uv__slurp("/proc/self/cgroup", buf, sizeof(buf)))
2260 return 0;
2261
2262 return uv__get_cgroup_constrained_memory(buf);
2263 }
2264
2265
uv__get_cgroup1_current_memory(char buf[static 1024])2266 static uint64_t uv__get_cgroup1_current_memory(char buf[static 1024]) {
2267 char filename[4097];
2268 uint64_t current;
2269 char* p;
2270 int n;
2271
2272 /* Find out where the controller is mounted. */
2273 p = uv__cgroup1_find_memory_controller(buf, &n);
2274 if (p != NULL) {
2275 snprintf(filename, sizeof(filename),
2276 "/sys/fs/cgroup/memory/%.*s/memory.usage_in_bytes", n, p);
2277 current = uv__read_uint64(filename);
2278
2279 /* If the controller wasn't mounted, the reads above will have failed,
2280 * as indicated by uv__read_uint64 returning 0.
2281 */
2282 if (current != 0)
2283 return current;
2284 }
2285
2286 /* Fall back to the usage of the global memory controller. */
2287 return uv__read_uint64("/sys/fs/cgroup/memory/memory.usage_in_bytes");
2288 }
2289
uv__get_cgroup2_current_memory(char buf[static 1024])2290 static uint64_t uv__get_cgroup2_current_memory(char buf[static 1024]) {
2291 char filename[4097];
2292 char* p;
2293 int n;
2294
2295 /* Find out where the controller is mounted. */
2296 p = buf + strlen("0::/");
2297 n = (int) strcspn(p, "\n");
2298
2299 snprintf(filename, sizeof(filename),
2300 "/sys/fs/cgroup/%.*s/memory.current", n, p);
2301 return uv__read_uint64(filename);
2302 }
2303
uv_get_available_memory(void)2304 uint64_t uv_get_available_memory(void) {
2305 char buf[1024];
2306 uint64_t constrained;
2307 uint64_t current;
2308 uint64_t total;
2309
2310 if (uv__slurp("/proc/self/cgroup", buf, sizeof(buf)))
2311 return 0;
2312
2313 constrained = uv__get_cgroup_constrained_memory(buf);
2314 if (constrained == 0)
2315 return uv_get_free_memory();
2316
2317 total = uv_get_total_memory();
2318 if (constrained > total)
2319 return uv_get_free_memory();
2320
2321 /* In the case of cgroupv2, we'll only have a single entry. */
2322 if (strncmp(buf, "0::/", 4))
2323 current = uv__get_cgroup1_current_memory(buf);
2324 else
2325 current = uv__get_cgroup2_current_memory(buf);
2326
2327 /* memory usage can be higher than the limit (for short bursts of time) */
2328 if (constrained < current)
2329 return 0;
2330
2331 return constrained - current;
2332 }
2333
2334
uv_loadavg(double avg[3])2335 void uv_loadavg(double avg[3]) {
2336 struct sysinfo info;
2337 char buf[128]; /* Large enough to hold all of /proc/loadavg. */
2338
2339 if (0 == uv__slurp("/proc/loadavg", buf, sizeof(buf)))
2340 if (3 == sscanf(buf, "%lf %lf %lf", &avg[0], &avg[1], &avg[2]))
2341 return;
2342
2343 if (sysinfo(&info) < 0)
2344 return;
2345
2346 avg[0] = (double) info.loads[0] / 65536.0;
2347 avg[1] = (double) info.loads[1] / 65536.0;
2348 avg[2] = (double) info.loads[2] / 65536.0;
2349 }
2350
2351
compare_watchers(const struct watcher_list* a, const struct watcher_list* b)2352 static int compare_watchers(const struct watcher_list* a,
2353 const struct watcher_list* b) {
2354 if (a->wd < b->wd) return -1;
2355 if (a->wd > b->wd) return 1;
2356 return 0;
2357 }
2358
2359
init_inotify(uv_loop_t* loop)2360 static int init_inotify(uv_loop_t* loop) {
2361 int fd;
2362
2363 if (loop->inotify_fd != -1)
2364 return 0;
2365
2366 fd = inotify_init1(IN_NONBLOCK | IN_CLOEXEC);
2367 if (fd < 0)
2368 return UV__ERR(errno);
2369
2370 loop->inotify_fd = fd;
2371 uv__io_init(&loop->inotify_read_watcher, uv__inotify_read, loop->inotify_fd);
2372 uv__io_start(loop, &loop->inotify_read_watcher, POLLIN);
2373
2374 return 0;
2375 }
2376
2377
uv__inotify_fork(uv_loop_t* loop, struct watcher_list* root)2378 static int uv__inotify_fork(uv_loop_t* loop, struct watcher_list* root) {
2379 /* Open the inotify_fd, and re-arm all the inotify watchers. */
2380 int err;
2381 struct watcher_list* tmp_watcher_list_iter;
2382 struct watcher_list* watcher_list;
2383 struct watcher_list tmp_watcher_list;
2384 struct uv__queue queue;
2385 struct uv__queue* q;
2386 uv_fs_event_t* handle;
2387 char* tmp_path;
2388
2389 if (root == NULL)
2390 return 0;
2391
2392 /* We must restore the old watcher list to be able to close items
2393 * out of it.
2394 */
2395 loop->inotify_watchers = root;
2396
2397 uv__queue_init(&tmp_watcher_list.watchers);
2398 /* Note that the queue we use is shared with the start and stop()
2399 * functions, making uv__queue_foreach unsafe to use. So we use the
2400 * uv__queue_move trick to safely iterate. Also don't free the watcher
2401 * list until we're done iterating. c.f. uv__inotify_read.
2402 */
2403 RB_FOREACH_SAFE(watcher_list, watcher_root,
2404 uv__inotify_watchers(loop), tmp_watcher_list_iter) {
2405 watcher_list->iterating = 1;
2406 uv__queue_move(&watcher_list->watchers, &queue);
2407 while (!uv__queue_empty(&queue)) {
2408 q = uv__queue_head(&queue);
2409 handle = uv__queue_data(q, uv_fs_event_t, watchers);
2410 /* It's critical to keep a copy of path here, because it
2411 * will be set to NULL by stop() and then deallocated by
2412 * maybe_free_watcher_list
2413 */
2414 tmp_path = uv__strdup(handle->path);
2415 assert(tmp_path != NULL);
2416 uv__queue_remove(q);
2417 uv__queue_insert_tail(&watcher_list->watchers, q);
2418 uv_fs_event_stop(handle);
2419
2420 uv__queue_insert_tail(&tmp_watcher_list.watchers, &handle->watchers);
2421 handle->path = tmp_path;
2422 }
2423 watcher_list->iterating = 0;
2424 maybe_free_watcher_list(watcher_list, loop);
2425 }
2426
2427 uv__queue_move(&tmp_watcher_list.watchers, &queue);
2428 while (!uv__queue_empty(&queue)) {
2429 q = uv__queue_head(&queue);
2430 uv__queue_remove(q);
2431 handle = uv__queue_data(q, uv_fs_event_t, watchers);
2432 tmp_path = handle->path;
2433 handle->path = NULL;
2434 err = uv_fs_event_start(handle, handle->cb, tmp_path, 0);
2435 uv__free(tmp_path);
2436 if (err)
2437 return err;
2438 }
2439
2440 return 0;
2441 }
2442
2443
find_watcher(uv_loop_t* loop, int wd)2444 static struct watcher_list* find_watcher(uv_loop_t* loop, int wd) {
2445 struct watcher_list w;
2446 w.wd = wd;
2447 return RB_FIND(watcher_root, uv__inotify_watchers(loop), &w);
2448 }
2449
2450
maybe_free_watcher_list(struct watcher_list* w, uv_loop_t* loop)2451 static void maybe_free_watcher_list(struct watcher_list* w, uv_loop_t* loop) {
2452 /* if the watcher_list->watchers is being iterated over, we can't free it. */
2453 if ((!w->iterating) && uv__queue_empty(&w->watchers)) {
2454 /* No watchers left for this path. Clean up. */
2455 RB_REMOVE(watcher_root, uv__inotify_watchers(loop), w);
2456 inotify_rm_watch(loop->inotify_fd, w->wd);
2457 uv__free(w);
2458 }
2459 }
2460
2461
uv__inotify_read(uv_loop_t* loop, uv__io_t* dummy, unsigned int events)2462 static void uv__inotify_read(uv_loop_t* loop,
2463 uv__io_t* dummy,
2464 unsigned int events) {
2465 const struct inotify_event* e;
2466 struct watcher_list* w;
2467 uv_fs_event_t* h;
2468 struct uv__queue queue;
2469 struct uv__queue* q;
2470 const char* path;
2471 ssize_t size;
2472 const char *p;
2473 /* needs to be large enough for sizeof(inotify_event) + strlen(path) */
2474 char buf[4096];
2475
2476 for (;;) {
2477 do
2478 size = read(loop->inotify_fd, buf, sizeof(buf));
2479 while (size == -1 && errno == EINTR);
2480
2481 if (size == -1) {
2482 assert(errno == EAGAIN || errno == EWOULDBLOCK);
2483 break;
2484 }
2485
2486 assert(size > 0); /* pre-2.6.21 thing, size=0 == read buffer too small */
2487
2488 /* Now we have one or more inotify_event structs. */
2489 for (p = buf; p < buf + size; p += sizeof(*e) + e->len) {
2490 e = (const struct inotify_event*) p;
2491
2492 events = 0;
2493 if (e->mask & (IN_ATTRIB|IN_MODIFY))
2494 events |= UV_CHANGE;
2495 if (e->mask & ~(IN_ATTRIB|IN_MODIFY))
2496 events |= UV_RENAME;
2497
2498 w = find_watcher(loop, e->wd);
2499 if (w == NULL)
2500 continue; /* Stale event, no watchers left. */
2501
2502 /* inotify does not return the filename when monitoring a single file
2503 * for modifications. Repurpose the filename for API compatibility.
2504 * I'm not convinced this is a good thing, maybe it should go.
2505 */
2506 path = e->len ? (const char*) (e + 1) : uv__basename_r(w->path);
2507
2508 /* We're about to iterate over the queue and call user's callbacks.
2509 * What can go wrong?
2510 * A callback could call uv_fs_event_stop()
2511 * and the queue can change under our feet.
2512 * So, we use uv__queue_move() trick to safely iterate over the queue.
2513 * And we don't free the watcher_list until we're done iterating.
2514 *
2515 * First,
2516 * tell uv_fs_event_stop() (that could be called from a user's callback)
2517 * not to free watcher_list.
2518 */
2519 w->iterating = 1;
2520 uv__queue_move(&w->watchers, &queue);
2521 while (!uv__queue_empty(&queue)) {
2522 q = uv__queue_head(&queue);
2523 h = uv__queue_data(q, uv_fs_event_t, watchers);
2524
2525 uv__queue_remove(q);
2526 uv__queue_insert_tail(&w->watchers, q);
2527
2528 h->cb(h, path, events, 0);
2529 }
2530 /* done iterating, time to (maybe) free empty watcher_list */
2531 w->iterating = 0;
2532 maybe_free_watcher_list(w, loop);
2533 }
2534 }
2535 }
2536
2537
uv_fs_event_init(uv_loop_t* loop, uv_fs_event_t* handle)2538 int uv_fs_event_init(uv_loop_t* loop, uv_fs_event_t* handle) {
2539 uv__handle_init(loop, (uv_handle_t*)handle, UV_FS_EVENT);
2540 return 0;
2541 }
2542
2543
uv_fs_event_start(uv_fs_event_t* handle, uv_fs_event_cb cb, const char* path, unsigned int flags)2544 int uv_fs_event_start(uv_fs_event_t* handle,
2545 uv_fs_event_cb cb,
2546 const char* path,
2547 unsigned int flags) {
2548 struct watcher_list* w;
2549 uv_loop_t* loop;
2550 size_t len;
2551 int events;
2552 int err;
2553 int wd;
2554
2555 if (uv__is_active(handle))
2556 return UV_EINVAL;
2557
2558 loop = handle->loop;
2559
2560 err = init_inotify(loop);
2561 if (err)
2562 return err;
2563
2564 events = IN_ATTRIB
2565 | IN_CREATE
2566 | IN_MODIFY
2567 | IN_DELETE
2568 | IN_DELETE_SELF
2569 | IN_MOVE_SELF
2570 | IN_MOVED_FROM
2571 | IN_MOVED_TO;
2572
2573 wd = inotify_add_watch(loop->inotify_fd, path, events);
2574 if (wd == -1)
2575 return UV__ERR(errno);
2576
2577 w = find_watcher(loop, wd);
2578 if (w)
2579 goto no_insert;
2580
2581 len = strlen(path) + 1;
2582 w = uv__malloc(sizeof(*w) + len);
2583 if (w == NULL)
2584 return UV_ENOMEM;
2585
2586 w->wd = wd;
2587 w->path = memcpy(w + 1, path, len);
2588 uv__queue_init(&w->watchers);
2589 w->iterating = 0;
2590 RB_INSERT(watcher_root, uv__inotify_watchers(loop), w);
2591
2592 no_insert:
2593 uv__handle_start(handle);
2594 uv__queue_insert_tail(&w->watchers, &handle->watchers);
2595 handle->path = w->path;
2596 handle->cb = cb;
2597 handle->wd = wd;
2598
2599 return 0;
2600 }
2601
2602
uv_fs_event_stop(uv_fs_event_t* handle)2603 int uv_fs_event_stop(uv_fs_event_t* handle) {
2604 struct watcher_list* w;
2605
2606 if (!uv__is_active(handle))
2607 return 0;
2608
2609 w = find_watcher(handle->loop, handle->wd);
2610 assert(w != NULL);
2611
2612 handle->wd = -1;
2613 handle->path = NULL;
2614 uv__handle_stop(handle);
2615 uv__queue_remove(&handle->watchers);
2616
2617 maybe_free_watcher_list(w, handle->loop);
2618
2619 return 0;
2620 }
2621
2622
uv__fs_event_close(uv_fs_event_t* handle)2623 void uv__fs_event_close(uv_fs_event_t* handle) {
2624 uv_fs_event_stop(handle);
2625 }
2626